From 256292c20d7b14cc2e2d97b1c39aba94b4dbb76d Mon Sep 17 00:00:00 2001
From: Daniel Ledda <daniel.ledda@stock3.com>
Date: Fri, 3 Jan 2025 19:21:18 +0100
Subject: [PATCH] fixing stuff

---
 .clangd                                |    4 +
 CMakeLists.txt                         |   98 -
 build                                  |    3 +
 build.bat                              |   27 +
 build.zig                              |   55 -
 lib/zmath/README.md                    |  138 -
 lib/zmath/build.zig                    |   97 -
 lib/zmath/src/benchmark.zig            |  469 ---
 lib/zmath/src/main.zig                 |   18 -
 lib/zmath/src/util.zig                 |  182 -
 lib/zmath/src/zmath.zig                | 4442 ------------------------
 src/SomaSolve.cpp                      |  366 +-
 src/SomaSolve.h                        |   11 +-
 src/VoxelSpace.cpp                     |  496 ++-
 src/VoxelSpace.h                       |   76 +-
 src/c.zig                              |    9 -
 src/gfx/Color.cpp                      |   11 +-
 src/gfx/Color.h                        |    4 +-
 src/gfx/Color.zig                      |   43 -
 src/gfx/Mesh.cpp                       |    2 +-
 src/gfx/Mesh.h                         |    6 +-
 src/gfx/Mesh.zig                       |   94 -
 src/gfx/OrbitControls.cpp              |    0
 src/gfx/OrbitControls.h                |   81 -
 src/gfx/Shader.cpp                     |   27 +-
 src/gfx/Shader.h                       |    2 +-
 src/gfx/Shader.zig                     |   56 -
 src/gfx/Texture.cpp                    |    8 +-
 src/gfx/Texture.h                      |    2 +-
 src/gfx/djleddaGeom.zig                |   57 -
 src/gfx/geometry.cpp                   |   80 +-
 src/gfx/geometry.h                     |   26 +-
 {lib/c => src/lib}/KHR/khrplatform.h   |    0
 src/lib/djstdlib/app.cpp               |   14 +
 src/lib/djstdlib/core.cpp              |  511 +++
 src/lib/djstdlib/core.h                |  219 ++
 src/lib/djstdlib/os.cpp                |   12 +
 src/lib/djstdlib/os.h                  |   12 +
 src/lib/djstdlib/os_linux.cpp          |   24 +
 src/lib/djstdlib/os_win32.cpp          |   21 +
 src/lib/djstdlib/vendor/stb_sprintf.h  | 1923 ++++++++++
 {lib/c => src/lib}/glad/glad.c         |    0
 {lib/c => src/lib}/glad/glad.h         |    0
 {lib/c => src/lib}/loaders/stb_image.h |    0
 {lib/c => src/lib}/loaders/tinyobj.h   |    0
 src/main.cpp                           |  174 +-
 src/main.zig                           |  402 ---
 47 files changed, 3401 insertions(+), 6901 deletions(-)
 create mode 100644 .clangd
 delete mode 100644 CMakeLists.txt
 create mode 100644 build
 create mode 100644 build.bat
 delete mode 100644 build.zig
 delete mode 100644 lib/zmath/README.md
 delete mode 100644 lib/zmath/build.zig
 delete mode 100644 lib/zmath/src/benchmark.zig
 delete mode 100644 lib/zmath/src/main.zig
 delete mode 100644 lib/zmath/src/util.zig
 delete mode 100644 lib/zmath/src/zmath.zig
 delete mode 100644 src/c.zig
 delete mode 100644 src/gfx/Color.zig
 delete mode 100644 src/gfx/Mesh.zig
 delete mode 100644 src/gfx/OrbitControls.cpp
 delete mode 100644 src/gfx/OrbitControls.h
 delete mode 100644 src/gfx/Shader.zig
 delete mode 100644 src/gfx/djleddaGeom.zig
 rename {lib/c => src/lib}/KHR/khrplatform.h (100%)
 create mode 100644 src/lib/djstdlib/app.cpp
 create mode 100644 src/lib/djstdlib/core.cpp
 create mode 100644 src/lib/djstdlib/core.h
 create mode 100644 src/lib/djstdlib/os.cpp
 create mode 100644 src/lib/djstdlib/os.h
 create mode 100644 src/lib/djstdlib/os_linux.cpp
 create mode 100644 src/lib/djstdlib/os_win32.cpp
 create mode 100644 src/lib/djstdlib/vendor/stb_sprintf.h
 rename {lib/c => src/lib}/glad/glad.c (100%)
 rename {lib/c => src/lib}/glad/glad.h (100%)
 rename {lib/c => src/lib}/loaders/stb_image.h (100%)
 rename {lib/c => src/lib}/loaders/tinyobj.h (100%)
 delete mode 100644 src/main.zig

diff --git a/.clangd b/.clangd
new file mode 100644
index 0000000..69eef2f
--- /dev/null
+++ b/.clangd
@@ -0,0 +1,4 @@
+CompileFlags:
+  Add: 
+    - -DOS_LINUX
+    - -I ./
diff --git a/CMakeLists.txt b/CMakeLists.txt
deleted file mode 100644
index 28b3018..0000000
--- a/CMakeLists.txt
+++ /dev/null
@@ -1,98 +0,0 @@
-cmake_minimum_required(VERSION 3.24)
-project(somaesque)
-
-set(VENDOR_DIR "${CMAKE_CURRENT_SOURCE_DIR}/vendor")
-set(SRC_DIR "${CMAKE_CURRENT_SOURCE_DIR}/src")
-
-set(CMAKE_EXPORT_COMPILE_COMMANDS true)
-set(CMAKE_BUILD_TYPE Release)
-set(CMAKE_CXX_FLAGS_RELEASE "-O2")
-set(CMAKE_CXX_STANDARD 20)
-
-option(GLFW_BUILD_DOCS OFF)
-option(GLFW_BUILD_EXAMPLES OFF)
-option(GLFW_BUILD_TESTS OFF)
-option(GLFW_INSTALL OFF)
-
-find_package(glfw3 3.3 REQUIRED)
-find_package(glm REQUIRED)
-
-# Glad 
-add_library(glad 
-    STATIC 
-        ${VENDOR_DIR}/glad/glad.c
-)
-target_include_directories(glad 
-    PUBLIC 
-        ${VENDOR_DIR}
-)
-
-# STB
-add_library(loaders
-    STATIC
-        ${VENDOR_DIR}/loaders/tinyobj.cpp
-        ${VENDOR_DIR}/loaders/stb_image.cpp
-)
-target_include_directories(loaders 
-    PUBLIC 
-        ${VENDOR_DIR}
-)
-
-# somaesque
-add_executable(${PROJECT_NAME})
-target_sources(${PROJECT_NAME}
-    PRIVATE
-        ${SRC_DIR}/main.cpp 
-        ${SRC_DIR}/VoxelSpace.cpp
-        ${SRC_DIR}/VoxelSpace.h
-        ${SRC_DIR}/SomaSolve.cpp
-        ${SRC_DIR}/SomaSolve.h
-        ${SRC_DIR}/gfx/Texture.h
-        ${SRC_DIR}/gfx/Texture.cpp
-        ${SRC_DIR}/gfx/Mesh.h
-        ${SRC_DIR}/gfx/Mesh.cpp
-        ${SRC_DIR}/gfx/Shader.h
-        ${SRC_DIR}/gfx/Shader.cpp
-        ${SRC_DIR}/gfx/Color.h
-        ${SRC_DIR}/gfx/Color.cpp
-        ${SRC_DIR}/gfx/geometry.h
-        ${SRC_DIR}/gfx/geometry.cpp
-)
-target_link_libraries(${PROJECT_NAME} 
-    PRIVATE
-        glfw 
-        GL 
-        X11 
-        pthread 
-        Xrandr 
-        dl 
-        glm::glm
-        glad
-        loaders
-)
-target_include_directories(somaesque
-    PUBLIC 
-        ${VENDOR_DIR}/KHR
-)
-
-# TESTING
-include(FetchContent)
-FetchContent_Declare(
-  googletest
-  URL https://github.com/google/googletest/archive/03597a01ee50ed33e9dfd640b249b4be3799d395.zip
-)
-FetchContent_MakeAvailable(googletest)
-
-enable_testing()
-add_executable(tests 
-    ${SRC_DIR}/tests.cpp
-    ${SRC_DIR}/VoxelSpace.cpp
-    ${SRC_DIR}/VoxelSpace.h
-)
-
-target_link_libraries(tests
-    GTest::gtest_main
-)
-
-include(GoogleTest)
-gtest_discover_tests(tests)
diff --git a/build b/build
new file mode 100644
index 0000000..703d23a
--- /dev/null
+++ b/build
@@ -0,0 +1,3 @@
+#!/bin/bash
+
+g++ -I ./ -g -g3 -lglfw -lGLU -lGL -lXrandr -lXxf86vm -lXi -lXinerama -lX11 -lrt -ldl -DOS_LINUX=1 -DENABLE_ASSERT=1 ./app.cpp -o ./target/app
diff --git a/build.bat b/build.bat
new file mode 100644
index 0000000..4d507fb
--- /dev/null
+++ b/build.bat
@@ -0,0 +1,27 @@
+@echo off
+
+if NOT EXIST .\target mkdir .\target
+
+set commonLinkerFlags=-opt:ref
+set commonCompilerFlags=^
+    -MT                                                             %= Make sure the C runtime library is statically linked =%^
+    -Gm-                                                            %= Turns off incremental building =%^
+    -nologo                                                         %= No one cares you made the compiler Microsoft =%^
+    -Oi                                                             %= Always use intrinsics  =%^
+    -EHa-                                                           %= Disable exception handling =%^
+    -GR-                                                            %= Never use runtime type info from C++ =%^
+    -WX -W4 -wd4201 -wd4100 -wd4189 -wd4505                         %= Compiler warnings, -WX warnings as errors, -W4 warning level 4, -wdXXXX disable warning XXXX =%^
+    -DAPP_DEBUG=0 -DENABLE_ASSERT=1 -DOS_WINDOWS=1                  %= Custom #defines =%^
+    -D_CRT_SECURE_NO_WARNINGS=1^
+    -FC                                                             %= Full path of source code file in diagnostics =%^
+    -Zi                                                             %= Generate debugger info =%
+
+pushd .\target
+cl %commonCompilerFlags% -Fe:.\app.exe ..\app.cpp /link -incremental:no %commonLinkerFlags%
+popd
+
+exit /b
+
+:error
+echo Failed with error #%errorlevel%.
+exit /b %errorlevel%
diff --git a/build.zig b/build.zig
deleted file mode 100644
index ba9db36..0000000
--- a/build.zig
+++ /dev/null
@@ -1,55 +0,0 @@
-const std = @import("std");
-const zmath = @import("lib/zmath/build.zig");
-
-pub fn build(b: *std.Build) void {
-    // Standard target options allows the person running `zig build` to choose
-    // what target to build for. Here we do not override the defaults, which
-    // means any target is allowed, and the default is native. Other options
-    // for restricting supported target set are available.
-
-    const target = b.standardTargetOptions(.{});
-
-    // Standard release options allow the person running `zig build` to select
-    // between Debug, ReleaseSafe, ReleaseFast, and ReleaseSmall.
-    const mode = b.standardOptimizeOption(.{});
-
-    const exe = b.addExecutable(.{
-        .name = "somaesque-native-zig",
-        .root_source_file = .{ .path = "src/main.zig" },
-        .target = target,
-        .optimize = mode,
-    });
-    exe.addIncludePath("/usr/local/include");
-
-    exe.linkLibC();
-    exe.linkSystemLibrary("glfw3");
-    exe.linkSystemLibrary("glm");
-    exe.linkSystemLibrary("GL");
-    exe.addIncludePath("lib/c");
-
-    exe.addCSourceFile("lib/c/glad/glad.c", &[_][]const u8{"-std=c11"});
-
-    exe.install();
-
-    // zmath
-    const zmath_pkg = zmath.package(b, target, mode, .{
-        .options = .{ .enable_cross_platform_determinism = true },
-    });
-    zmath_pkg.link(exe);
-
-    const run_cmd = exe.run();
-    run_cmd.step.dependOn(b.getInstallStep());
-    if (b.args) |args| {
-        run_cmd.addArgs(args);
-    }
-
-    const run_step = b.step("run", "Run the app");
-    run_step.dependOn(&run_cmd.step);
-
-    //const exe_tests = b.addTest("src/main.zig");
-    //exe_tests.setTarget(target);
-    //exe_tests.setBuildMode(mode);
-
-    //const test_step = b.step("test", "Run unit tests");
-    //test_step.dependOn(&exe_tests.step);
-}
diff --git a/lib/zmath/README.md b/lib/zmath/README.md
deleted file mode 100644
index c11ef1f..0000000
--- a/lib/zmath/README.md
+++ /dev/null
@@ -1,138 +0,0 @@
-# zmath v0.9.6 - SIMD math library for game developers
-
-Tested on x86_64 and AArch64.
-
-Provides ~140 optimized routines and ~70 extensive tests.
-
-Can be used with any graphics API.
-
-Documentation can be found [here](https://github.com/michal-z/zig-gamedev/blob/main/libs/zmath/src/zmath.zig).
-
-Benchamrks can be found [here](https://github.com/michal-z/zig-gamedev/blob/main/libs/zmath/src/benchmark.zig).
-
-An intro article can be found [here](https://zig.news/michalz/fast-multi-platform-simd-math-library-in-zig-2adn).
-
-## Getting started
-
-Copy `zmath` folder to a `libs` subdirectory of the root of your project.
-
-Then in your `build.zig` add:
-
-```zig
-const std = @import("std");
-const zmath = @import("libs/zmath/build.zig");
-
-pub fn build(b: *std.Build) void {
-    ...
-    const optimize = b.standardOptimizeOption(.{});
-    const target = b.standardTargetOptions(.{});
-
-    zmath_pkg = zmath.package(b, target, optimize, .{
-        .options = .{ .enable_cross_platform_determinism = true },
-    });
-
-    zmath_pkg.link(exe);
-}
-```
-
-Now in your code you may import and use zmath:
-
-```zig
-const zm = @import("zmath");
-
-pub fn main() !void {
-    //
-    // OpenGL/Vulkan example
-    //
-    const object_to_world = zm.rotationY(..);
-    const world_to_view = zm.lookAtRh(
-        zm.f32x4(3.0, 3.0, 3.0, 1.0), // eye position
-        zm.f32x4(0.0, 0.0, 0.0, 1.0), // focus point
-        zm.f32x4(0.0, 1.0, 0.0, 0.0), // up direction ('w' coord is zero because this is a vector not a point)
-    );
-    // `perspectiveFovRhGl` produces Z values in [-1.0, 1.0] range (Vulkan app should use `perspectiveFovRh`)
-    const view_to_clip = zm.perspectiveFovRhGl(0.25 * math.pi, aspect_ratio, 0.1, 20.0);
-
-    const object_to_view = zm.mul(object_to_world, world_to_view);
-    const object_to_clip = zm.mul(object_to_view, view_to_clip);
-
-    // Transposition is needed because GLSL uses column-major matrices by default
-    gl.uniformMatrix4fv(0, 1, gl.TRUE, zm.arrNPtr(&object_to_clip));
-    
-    // In GLSL: gl_Position = vec4(in_position, 1.0) * object_to_clip;
-    
-    //
-    // DirectX example
-    //
-    const object_to_world = zm.rotationY(..);
-    const world_to_view = zm.lookAtLh(
-        zm.f32x4(3.0, 3.0, -3.0, 1.0), // eye position
-        zm.f32x4(0.0, 0.0, 0.0, 1.0), // focus point
-        zm.f32x4(0.0, 1.0, 0.0, 0.0), // up direction ('w' coord is zero because this is a vector not a point)
-    );
-    const view_to_clip = zm.perspectiveFovLh(0.25 * math.pi, aspect_ratio, 0.1, 20.0);
-
-    const object_to_view = zm.mul(object_to_world, world_to_view);
-    const object_to_clip = zm.mul(object_to_view, view_to_clip);
-    
-    // Transposition is needed because HLSL uses column-major matrices by default
-    const mem = allocateUploadMemory(...);
-    zm.storeMat(mem, zm.transpose(object_to_clip));
-    
-    // In HLSL: out_position_sv = mul(float4(in_position, 1.0), object_to_clip);
-    
-    //
-    // 'WASD' camera movement example
-    //
-    {
-        const speed = zm.f32x4s(10.0);
-        const delta_time = zm.f32x4s(demo.frame_stats.delta_time);
-        const transform = zm.mul(zm.rotationX(demo.camera.pitch), zm.rotationY(demo.camera.yaw));
-        var forward = zm.normalize3(zm.mul(zm.f32x4(0.0, 0.0, 1.0, 0.0), transform));
-
-        zm.storeArr3(&demo.camera.forward, forward);
-
-        const right = speed * delta_time * zm.normalize3(zm.cross3(zm.f32x4(0.0, 1.0, 0.0, 0.0), forward));
-        forward = speed * delta_time * forward;
-
-        var cam_pos = zm.loadArr3(demo.camera.position);
-
-        if (keyDown('W')) {
-            cam_pos += forward;
-        } else if (keyDown('S')) {
-            cam_pos -= forward;
-        }
-        if (keyDown('D')) {
-            cam_pos += right;
-        } else if (keyDown('A')) {
-            cam_pos -= right;
-        }
-
-        zm.storeArr3(&demo.camera.position, cam_pos);
-    }
-   
-    //
-    // SIMD wave equation solver example (works with vector width 4, 8 and 16)
-    // 'T' can be F32x4, F32x8 or F32x16
-    //
-    var z_index: i32 = 0;
-    while (z_index < grid_size) : (z_index += 1) {
-        const z = scale * @intToFloat(f32, z_index - grid_size / 2);
-        const vz = zm.splat(T, z);
-
-        var x_index: i32 = 0;
-        while (x_index < grid_size) : (x_index += zm.veclen(T)) {
-            const x = scale * @intToFloat(f32, x_index - grid_size / 2);
-            const vx = zm.splat(T, x) + voffset * zm.splat(T, scale);
-
-            const d = zm.sqrt(vx * vx + vz * vz);
-            const vy = zm.sin(d - vtime);
-
-            const index = @intCast(usize, x_index + z_index * grid_size);
-            zm.store(xslice[index..], vx, 0);
-            zm.store(yslice[index..], vy, 0);
-            zm.store(zslice[index..], vz, 0);
-        }
-    }
-}
-```
diff --git a/lib/zmath/build.zig b/lib/zmath/build.zig
deleted file mode 100644
index ce21bc1..0000000
--- a/lib/zmath/build.zig
+++ /dev/null
@@ -1,97 +0,0 @@
-const std = @import("std");
-
-pub const Options = struct {
-    enable_cross_platform_determinism: bool = true,
-};
-
-pub const Package = struct {
-    options: Options,
-    zmath: *std.Build.Module,
-    zmath_options: *std.Build.Module,
-
-    pub fn link(pkg: Package, exe: *std.Build.CompileStep) void {
-        exe.addModule("zmath", pkg.zmath);
-        exe.addModule("zmath_options", pkg.zmath_options);
-    }
-};
-
-pub fn package(
-    b: *std.Build,
-    _: std.zig.CrossTarget,
-    _: std.builtin.Mode,
-    args: struct {
-        options: Options = .{},
-    },
-) Package {
-    const step = b.addOptions();
-    step.addOption(
-        bool,
-        "enable_cross_platform_determinism",
-        args.options.enable_cross_platform_determinism,
-    );
-
-    const zmath_options = step.createModule();
-
-    const zmath = b.createModule(.{
-        .source_file = .{ .path = thisDir() ++ "/src/main.zig" },
-        .dependencies = &.{
-            .{ .name = "zmath_options", .module = zmath_options },
-        },
-    });
-
-    return .{
-        .options = args.options,
-        .zmath = zmath,
-        .zmath_options = zmath_options,
-    };
-}
-
-pub fn build(b: *std.Build) void {
-    const optimize = b.standardOptimizeOption(.{});
-    const target = b.standardTargetOptions(.{});
-
-    const test_step = b.step("test", "Run zmath tests");
-    test_step.dependOn(runTests(b, optimize, target));
-
-    const benchmark_step = b.step("benchmark", "Run zmath benchmarks");
-    benchmark_step.dependOn(runBenchmarks(b, target));
-}
-
-pub fn runTests(
-    b: *std.Build,
-    optimize: std.builtin.Mode,
-    target: std.zig.CrossTarget,
-) *std.Build.Step {
-    const tests = b.addTest(.{
-        .name = "zmath-tests",
-        .root_source_file = .{ .path = thisDir() ++ "/src/main.zig" },
-        .target = target,
-        .optimize = optimize,
-    });
-
-    const zmath_pkg = package(b, target, optimize, .{});
-    tests.addModule("zmath_options", zmath_pkg.zmath_options);
-
-    return &tests.run().step;
-}
-
-pub fn runBenchmarks(
-    b: *std.Build,
-    target: std.zig.CrossTarget,
-) *std.Build.Step {
-    const exe = b.addExecutable(.{
-        .name = "zmath-benchmarks",
-        .root_source_file = .{ .path = thisDir() ++ "/src/benchmark.zig" },
-        .target = target,
-        .optimize = .ReleaseFast,
-    });
-
-    const zmath_pkg = package(b, target, .ReleaseFast, .{});
-    exe.addModule("zmath", zmath_pkg.zmath);
-
-    return &exe.run().step;
-}
-
-inline fn thisDir() []const u8 {
-    return comptime std.fs.path.dirname(@src().file) orelse ".";
-}
diff --git a/lib/zmath/src/benchmark.zig b/lib/zmath/src/benchmark.zig
deleted file mode 100644
index 136e29d..0000000
--- a/lib/zmath/src/benchmark.zig
+++ /dev/null
@@ -1,469 +0,0 @@
-// -------------------------------------------------------------------------------------------------
-// zmath - benchmarks
-// -------------------------------------------------------------------------------------------------
-// 'zig build benchmark' in the root project directory will build and run 'ReleaseFast' configuration.
-//
-// -------------------------------------------------------------------------------------------------
-// 'AMD Ryzen 9 3950X 16-Core Processor', Windows 11, Zig 0.10.0-dev.2620+0e9458a3f
-// -------------------------------------------------------------------------------------------------
-//                matrix mul benchmark (AOS) - scalar version: 1.5880s, zmath version: 1.0642s
-//       cross3, scale, bias benchmark (AOS) - scalar version: 0.9318s, zmath version: 0.6888s
-// cross3, dot3, scale, bias benchmark (AOS) - scalar version: 1.2258s, zmath version: 1.1095s
-//            quaternion mul benchmark (AOS) - scalar version: 1.4123s, zmath version: 0.6958s
-//                      wave benchmark (SOA) - scalar version: 4.8165s, zmath version: 0.7338s
-//
-// -------------------------------------------------------------------------------------------------
-// 'AMD Ryzen 7 5800X 8-Core Processer', Linux 5.17.14, Zig 0.10.0-dev.2624+d506275a0
-// -------------------------------------------------------------------------------------------------
-//                matrix mul benchmark (AOS) - scalar version: 1.3672s, zmath version: 0.8617s
-//       cross3, scale, bias benchmark (AOS) - scalar version: 0.6586s, zmath version: 0.4803s
-// cross3, dot3, scale, bias benchmark (AOS) - scalar version: 1.0620s, zmath version: 0.8942s
-//            quaternion mul benchmark (AOS) - scalar version: 1.1324s, zmath version: 0.6064s
-//                      wave benchmark (SOA) - scalar version: 3.6598s, zmath version: 0.4231s
-//
-// -------------------------------------------------------------------------------------------------
-// 'Apple M1 Max', macOS Version 12.4, Zig 0.10.0-dev.2657+74442f350
-// -------------------------------------------------------------------------------------------------
-//                matrix mul benchmark (AOS) - scalar version: 1.0297s, zmath version: 1.0538s
-//       cross3, scale, bias benchmark (AOS) - scalar version: 0.6294s, zmath version: 0.6532s
-// cross3, dot3, scale, bias benchmark (AOS) - scalar version: 0.9807s, zmath version: 1.0988s
-//            quaternion mul benchmark (AOS) - scalar version: 1.5413s, zmath version: 0.7800s
-//                      wave benchmark (SOA) - scalar version: 3.4220s, zmath version: 1.0255s
-//
-// -------------------------------------------------------------------------------------------------
-// '11th Gen Intel(R) Core(TM) i7-11800H @ 2.30GHz', Windows 11, Zig 0.10.0-dev.2620+0e9458a3f
-// -------------------------------------------------------------------------------------------------
-//                matrix mul benchmark (AOS) - scalar version: 2.2308s, zmath version: 0.9376s
-//       cross3, scale, bias benchmark (AOS) - scalar version: 1.0821s, zmath version: 0.5110s
-// cross3, dot3, scale, bias benchmark (AOS) - scalar version: 1.6580s, zmath version: 0.9167s
-//            quaternion mul benchmark (AOS) - scalar version: 2.0139s, zmath version: 0.5856s
-//                      wave benchmark (SOA) - scalar version: 3.7832s, zmath version: 0.3642s
-//
-// -------------------------------------------------------------------------------------------------
-
-pub fn main() !void {
-    var gpa = std.heap.GeneralPurposeAllocator(.{}){};
-    defer _ = gpa.deinit();
-    const allocator = gpa.allocator();
-
-    // m = mul(ma, mb); data set fits in L1 cache; AOS data layout.
-    try mat4MulBenchmark(allocator, 100_000);
-
-    // v = 0.01 * cross3(va, vb) + vec3(1.0); data set fits in L1 cache; AOS data layout.
-    try cross3ScaleBiasBenchmark(allocator, 10_000);
-
-    // v = dot3(va, vb) * (0.1 * cross3(va, vb) + vec3(1.0)); data set fits in L1 cache; AOS data layout.
-    try cross3Dot3ScaleBiasBenchmark(allocator, 10_000);
-
-    // q = qmul(qa, qb); data set fits in L1 cache; AOS data layout.
-    try quatBenchmark(allocator, 10_000);
-
-    // d = sqrt(x * x + z * z); y = sin(d - t); SOA layout.
-    try waveBenchmark(allocator, 1_000);
-}
-
-const std = @import("std");
-const time = std.time;
-const Timer = time.Timer;
-const zm = @import("zmath");
-
-var prng = std.rand.DefaultPrng.init(0);
-const random = prng.random();
-
-noinline fn mat4MulBenchmark(allocator: std.mem.Allocator, comptime count: comptime_int) !void {
-    std.debug.print("\n", .{});
-    std.debug.print("{s:>42} - ", .{"matrix mul benchmark (AOS)"});
-
-    var data0 = std.ArrayList([16]f32).init(allocator);
-    defer data0.deinit();
-    var data1 = std.ArrayList([16]f32).init(allocator);
-    defer data1.deinit();
-
-    var i: usize = 0;
-    while (i < 64) : (i += 1) {
-        try data0.append([16]f32{
-            random.float(f32), random.float(f32), random.float(f32), random.float(f32),
-            random.float(f32), random.float(f32), random.float(f32), random.float(f32),
-            random.float(f32), random.float(f32), random.float(f32), random.float(f32),
-            random.float(f32), random.float(f32), random.float(f32), random.float(f32),
-        });
-        try data1.append([16]f32{
-            random.float(f32), random.float(f32), random.float(f32), random.float(f32),
-            random.float(f32), random.float(f32), random.float(f32), random.float(f32),
-            random.float(f32), random.float(f32), random.float(f32), random.float(f32),
-            random.float(f32), random.float(f32), random.float(f32), random.float(f32),
-        });
-    }
-
-    // Warmup, fills L1 cache.
-    i = 0;
-    while (i < 100) : (i += 1) {
-        for (data1.items) |b| {
-            for (data0.items) |a| {
-                const ma = zm.loadMat(a[0..]);
-                const mb = zm.loadMat(b[0..]);
-                const r = zm.mul(ma, mb);
-                std.mem.doNotOptimizeAway(&r);
-            }
-        }
-    }
-
-    {
-        i = 0;
-        var timer = try Timer.start();
-        const start = timer.lap();
-        while (i < count) : (i += 1) {
-            for (data1.items) |b| {
-                for (data0.items) |a| {
-                    const r = [16]f32{
-                        a[0] * b[0] + a[1] * b[4] + a[2] * b[8] + a[3] * b[12],
-                        a[0] * b[1] + a[1] * b[5] + a[2] * b[9] + a[3] * b[13],
-                        a[0] * b[2] + a[1] * b[6] + a[2] * b[10] + a[3] * b[14],
-                        a[0] * b[3] + a[1] * b[7] + a[2] * b[11] + a[3] * b[15],
-                        a[4] * b[0] + a[5] * b[4] + a[6] * b[8] + a[7] * b[12],
-                        a[4] * b[1] + a[5] * b[5] + a[6] * b[9] + a[7] * b[13],
-                        a[4] * b[2] + a[5] * b[6] + a[6] * b[10] + a[7] * b[14],
-                        a[4] * b[3] + a[5] * b[7] + a[6] * b[11] + a[7] * b[15],
-                        a[8] * b[0] + a[9] * b[4] + a[10] * b[8] + a[11] * b[12],
-                        a[8] * b[1] + a[9] * b[5] + a[10] * b[9] + a[11] * b[13],
-                        a[8] * b[2] + a[9] * b[6] + a[10] * b[10] + a[11] * b[14],
-                        a[8] * b[3] + a[9] * b[7] + a[10] * b[11] + a[11] * b[15],
-                        a[12] * b[0] + a[13] * b[4] + a[14] * b[8] + a[15] * b[12],
-                        a[12] * b[1] + a[13] * b[5] + a[14] * b[9] + a[15] * b[13],
-                        a[12] * b[2] + a[13] * b[6] + a[14] * b[10] + a[15] * b[14],
-                        a[12] * b[3] + a[13] * b[7] + a[14] * b[11] + a[15] * b[15],
-                    };
-                    std.mem.doNotOptimizeAway(&r);
-                }
-            }
-        }
-        const end = timer.read();
-        const elapsed_s = @intToFloat(f64, end - start) / time.ns_per_s;
-
-        std.debug.print("scalar version: {d:.4}s, ", .{elapsed_s});
-    }
-
-    {
-        i = 0;
-        var timer = try Timer.start();
-        const start = timer.lap();
-        while (i < count) : (i += 1) {
-            for (data1.items) |b| {
-                for (data0.items) |a| {
-                    const ma = zm.loadMat(a[0..]);
-                    const mb = zm.loadMat(b[0..]);
-                    const r = zm.mul(ma, mb);
-                    std.mem.doNotOptimizeAway(&r);
-                }
-            }
-        }
-        const end = timer.read();
-        const elapsed_s = @intToFloat(f64, end - start) / time.ns_per_s;
-
-        std.debug.print("zmath version: {d:.4}s\n", .{elapsed_s});
-    }
-}
-
-noinline fn cross3ScaleBiasBenchmark(allocator: std.mem.Allocator, comptime count: comptime_int) !void {
-    std.debug.print("{s:>42} - ", .{"cross3, scale, bias benchmark (AOS)"});
-
-    var data0 = std.ArrayList([3]f32).init(allocator);
-    defer data0.deinit();
-    var data1 = std.ArrayList([3]f32).init(allocator);
-    defer data1.deinit();
-
-    var i: usize = 0;
-    while (i < 256) : (i += 1) {
-        try data0.append([3]f32{ random.float(f32), random.float(f32), random.float(f32) });
-        try data1.append([3]f32{ random.float(f32), random.float(f32), random.float(f32) });
-    }
-
-    // Warmup, fills L1 cache.
-    i = 0;
-    while (i < 100) : (i += 1) {
-        for (data1.items) |b| {
-            for (data0.items) |a| {
-                const va = zm.loadArr3(a);
-                const vb = zm.loadArr3(b);
-                const cp = zm.f32x4s(0.01) * zm.cross3(va, vb) + zm.f32x4s(1.0);
-                std.mem.doNotOptimizeAway(&cp);
-            }
-        }
-    }
-
-    {
-        i = 0;
-        var timer = try Timer.start();
-        const start = timer.lap();
-        while (i < count) : (i += 1) {
-            for (data1.items) |b| {
-                for (data0.items) |a| {
-                    const r = [3]f32{
-                        0.01 * (a[1] * b[2] - a[2] * b[1]) + 1.0,
-                        0.01 * (a[2] * b[0] - a[0] * b[2]) + 1.0,
-                        0.01 * (a[0] * b[1] - a[1] * b[0]) + 1.0,
-                    };
-                    std.mem.doNotOptimizeAway(&r);
-                }
-            }
-        }
-        const end = timer.read();
-        const elapsed_s = @intToFloat(f64, end - start) / time.ns_per_s;
-
-        std.debug.print("scalar version: {d:.4}s, ", .{elapsed_s});
-    }
-
-    {
-        i = 0;
-        var timer = try Timer.start();
-        const start = timer.lap();
-        while (i < count) : (i += 1) {
-            for (data1.items) |b| {
-                for (data0.items) |a| {
-                    const va = zm.loadArr3(a);
-                    const vb = zm.loadArr3(b);
-                    const cp = zm.f32x4s(0.01) * zm.cross3(va, vb) + zm.f32x4s(1.0);
-                    std.mem.doNotOptimizeAway(&cp);
-                }
-            }
-        }
-        const end = timer.read();
-        const elapsed_s = @intToFloat(f64, end - start) / time.ns_per_s;
-
-        std.debug.print("zmath version: {d:.4}s\n", .{elapsed_s});
-    }
-}
-
-noinline fn cross3Dot3ScaleBiasBenchmark(allocator: std.mem.Allocator, comptime count: comptime_int) !void {
-    std.debug.print("{s:>42} - ", .{"cross3, dot3, scale, bias benchmark (AOS)"});
-
-    var data0 = std.ArrayList([3]f32).init(allocator);
-    defer data0.deinit();
-    var data1 = std.ArrayList([3]f32).init(allocator);
-    defer data1.deinit();
-
-    var i: usize = 0;
-    while (i < 256) : (i += 1) {
-        try data0.append([3]f32{ random.float(f32), random.float(f32), random.float(f32) });
-        try data1.append([3]f32{ random.float(f32), random.float(f32), random.float(f32) });
-    }
-
-    // Warmup, fills L1 cache.
-    i = 0;
-    while (i < 100) : (i += 1) {
-        for (data1.items) |b| {
-            for (data0.items) |a| {
-                const va = zm.loadArr3(a);
-                const vb = zm.loadArr3(b);
-                const r = (zm.dot3(va, vb) * (zm.f32x4s(0.1) * zm.cross3(va, vb) + zm.f32x4s(1.0)))[0];
-                std.mem.doNotOptimizeAway(&r);
-            }
-        }
-    }
-
-    {
-        i = 0;
-        var timer = try Timer.start();
-        const start = timer.lap();
-        while (i < count) : (i += 1) {
-            for (data1.items) |b| {
-                for (data0.items) |a| {
-                    const d = a[0] * b[0] + a[1] * b[1] + a[2] * b[2];
-                    const r = [3]f32{
-                        d * (0.1 * (a[1] * b[2] - a[2] * b[1]) + 1.0),
-                        d * (0.1 * (a[2] * b[0] - a[0] * b[2]) + 1.0),
-                        d * (0.1 * (a[0] * b[1] - a[1] * b[0]) + 1.0),
-                    };
-                    std.mem.doNotOptimizeAway(&r);
-                }
-            }
-        }
-        const end = timer.read();
-        const elapsed_s = @intToFloat(f64, end - start) / time.ns_per_s;
-
-        std.debug.print("scalar version: {d:.4}s, ", .{elapsed_s});
-    }
-
-    {
-        i = 0;
-        var timer = try Timer.start();
-        const start = timer.lap();
-        while (i < count) : (i += 1) {
-            for (data1.items) |b| {
-                for (data0.items) |a| {
-                    const va = zm.loadArr3(a);
-                    const vb = zm.loadArr3(b);
-                    const r = zm.dot3(va, vb) * (zm.f32x4s(0.1) * zm.cross3(va, vb) + zm.f32x4s(1.0));
-                    std.mem.doNotOptimizeAway(&r);
-                }
-            }
-        }
-        const end = timer.read();
-        const elapsed_s = @intToFloat(f64, end - start) / time.ns_per_s;
-
-        std.debug.print("zmath version: {d:.4}s\n", .{elapsed_s});
-    }
-}
-
-noinline fn quatBenchmark(allocator: std.mem.Allocator, comptime count: comptime_int) !void {
-    std.debug.print("{s:>42} - ", .{"quaternion mul benchmark (AOS)"});
-
-    var data0 = std.ArrayList([4]f32).init(allocator);
-    defer data0.deinit();
-    var data1 = std.ArrayList([4]f32).init(allocator);
-    defer data1.deinit();
-
-    var i: usize = 0;
-    while (i < 256) : (i += 1) {
-        try data0.append([4]f32{ random.float(f32), random.float(f32), random.float(f32), random.float(f32) });
-        try data1.append([4]f32{ random.float(f32), random.float(f32), random.float(f32), random.float(f32) });
-    }
-
-    // Warmup, fills L1 cache.
-    i = 0;
-    while (i < 100) : (i += 1) {
-        for (data1.items) |b| {
-            for (data0.items) |a| {
-                const va = zm.loadArr4(a);
-                const vb = zm.loadArr4(b);
-                const r = zm.qmul(va, vb);
-                std.mem.doNotOptimizeAway(&r);
-            }
-        }
-    }
-
-    {
-        i = 0;
-        var timer = try Timer.start();
-        const start = timer.lap();
-        while (i < count) : (i += 1) {
-            for (data1.items) |b| {
-                for (data0.items) |a| {
-                    const r = [4]f32{
-                        (b[3] * a[0]) + (b[0] * a[3]) + (b[1] * a[2]) - (b[2] * a[1]),
-                        (b[3] * a[1]) - (b[0] * a[2]) + (b[1] * a[3]) + (b[2] * a[0]),
-                        (b[3] * a[2]) + (b[0] * a[1]) - (b[1] * a[0]) + (b[2] * a[3]),
-                        (b[3] * a[3]) - (b[0] * a[0]) - (b[1] * a[1]) - (b[2] * a[2]),
-                    };
-                    std.mem.doNotOptimizeAway(&r);
-                }
-            }
-        }
-        const end = timer.read();
-        const elapsed_s = @intToFloat(f64, end - start) / time.ns_per_s;
-
-        std.debug.print("scalar version: {d:.4}s, ", .{elapsed_s});
-    }
-
-    {
-        i = 0;
-        var timer = try Timer.start();
-        const start = timer.lap();
-        while (i < count) : (i += 1) {
-            for (data1.items) |b| {
-                for (data0.items) |a| {
-                    const va = zm.loadArr4(a);
-                    const vb = zm.loadArr4(b);
-                    const r = zm.qmul(va, vb);
-                    std.mem.doNotOptimizeAway(&r);
-                }
-            }
-        }
-        const end = timer.read();
-        const elapsed_s = @intToFloat(f64, end - start) / time.ns_per_s;
-
-        std.debug.print("zmath version: {d:.4}s\n", .{elapsed_s});
-    }
-}
-
-noinline fn waveBenchmark(allocator: std.mem.Allocator, comptime count: comptime_int) !void {
-    _ = allocator;
-    std.debug.print("{s:>42} - ", .{"wave benchmark (SOA)"});
-
-    const grid_size = 1024;
-    {
-        var t: f32 = 0.0;
-
-        const scale: f32 = 0.05;
-
-        var timer = try Timer.start();
-        const start = timer.lap();
-
-        var iter: usize = 0;
-        while (iter < count) : (iter += 1) {
-            var z_index: i32 = 0;
-            while (z_index < grid_size) : (z_index += 1) {
-                const z = scale * @intToFloat(f32, z_index - grid_size / 2);
-
-                var x_index: i32 = 0;
-                while (x_index < grid_size) : (x_index += 4) {
-                    const x0 = scale * @intToFloat(f32, x_index + 0 - grid_size / 2);
-                    const x1 = scale * @intToFloat(f32, x_index + 1 - grid_size / 2);
-                    const x2 = scale * @intToFloat(f32, x_index + 2 - grid_size / 2);
-                    const x3 = scale * @intToFloat(f32, x_index + 3 - grid_size / 2);
-
-                    const d0 = zm.sqrt(x0 * x0 + z * z);
-                    const d1 = zm.sqrt(x1 * x1 + z * z);
-                    const d2 = zm.sqrt(x2 * x2 + z * z);
-                    const d3 = zm.sqrt(x3 * x3 + z * z);
-
-                    const y0 = zm.sin(d0 - t);
-                    const y1 = zm.sin(d1 - t);
-                    const y2 = zm.sin(d2 - t);
-                    const y3 = zm.sin(d3 - t);
-
-                    std.mem.doNotOptimizeAway(&y0);
-                    std.mem.doNotOptimizeAway(&y1);
-                    std.mem.doNotOptimizeAway(&y2);
-                    std.mem.doNotOptimizeAway(&y3);
-                }
-            }
-            t += 0.001;
-        }
-        const end = timer.read();
-        const elapsed_s = @intToFloat(f64, end - start) / time.ns_per_s;
-
-        std.debug.print("scalar version: {d:.4}s, ", .{elapsed_s});
-    }
-
-    {
-        const T = zm.F32x16;
-
-        const static = struct {
-            const offsets = [16]f32{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
-        };
-        const voffset = zm.load(static.offsets[0..], T, 0);
-        var vt = zm.splat(T, 0.0);
-
-        const scale: f32 = 0.05;
-
-        var timer = try Timer.start();
-        const start = timer.lap();
-
-        var iter: usize = 0;
-        while (iter < count) : (iter += 1) {
-            var z_index: i32 = 0;
-            while (z_index < grid_size) : (z_index += 1) {
-                const z = scale * @intToFloat(f32, z_index - grid_size / 2);
-                const vz = zm.splat(T, z);
-
-                var x_index: i32 = 0;
-                while (x_index < grid_size) : (x_index += zm.veclen(T)) {
-                    const x = scale * @intToFloat(f32, x_index - grid_size / 2);
-                    const vx = zm.splat(T, x) + voffset * zm.splat(T, scale);
-
-                    const d = zm.sqrt(vx * vx + vz * vz);
-
-                    const vy = zm.sin(d - vt);
-
-                    std.mem.doNotOptimizeAway(&vy);
-                }
-            }
-            vt += zm.splat(T, 0.001);
-        }
-        const end = timer.read();
-        const elapsed_s = @intToFloat(f64, end - start) / time.ns_per_s;
-
-        std.debug.print("zmath version: {d:.4}s\n", .{elapsed_s});
-    }
-}
diff --git a/lib/zmath/src/main.zig b/lib/zmath/src/main.zig
deleted file mode 100644
index 5834745..0000000
--- a/lib/zmath/src/main.zig
+++ /dev/null
@@ -1,18 +0,0 @@
-//--------------------------------------------------------------------------------------------------
-//
-// SIMD math library for game developers
-// https://github.com/michal-z/zig-gamedev/tree/main/libs/zmath
-//
-// See zmath.zig for more details.
-// See util.zig for additional functionality.
-//
-//--------------------------------------------------------------------------------------------------
-pub const version = @import("std").SemanticVersion{ .major = 0, .minor = 9, .patch = 6 };
-
-pub usingnamespace @import("zmath.zig");
-pub const util = @import("util.zig");
-
-// ensure transitive closure of test coverage
-comptime {
-    _ = util;
-}
diff --git a/lib/zmath/src/util.zig b/lib/zmath/src/util.zig
deleted file mode 100644
index aa79020..0000000
--- a/lib/zmath/src/util.zig
+++ /dev/null
@@ -1,182 +0,0 @@
-// ==============================================================================
-//
-// Collection of useful functions building on top of, and extending, core zmath.
-// https://github.com/michal-z/zig-gamedev/tree/main/libs/zmath
-//
-// ------------------------------------------------------------------------------
-// 1. Matrix functions
-// ------------------------------------------------------------------------------
-//
-// As an example, in a left handed Y-up system:
-//   getAxisX is equivalent to the right vector
-//   getAxisY is equivalent to the up vector
-//   getAxisZ is equivalent to the forward vector
-//
-// getTranslationVec(m: Mat) Vec
-// getAxisX(m: Mat) Vec
-// getAxisY(m: Mat) Vec
-// getAxisZ(m: Mat) Vec
-//
-// ==============================================================================
-
-const zm = @import("zmath.zig");
-const std = @import("std");
-const math = std.math;
-const expect = std.testing.expect;
-
-pub fn getTranslationVec(m: zm.Mat) zm.Vec {
-    var translation = m[3];
-    translation[3] = 0;
-    return translation;
-}
-
-pub fn getScaleVec(m: zm.Mat) zm.Vec {
-    const scale_x = zm.length3(zm.f32x4(m[0][0], m[1][0], m[2][0], 0))[0];
-    const scale_y = zm.length3(zm.f32x4(m[0][1], m[1][1], m[2][1], 0))[0];
-    const scale_z = zm.length3(zm.f32x4(m[0][2], m[1][2], m[2][2], 0))[0];
-    return zm.f32x4(scale_x, scale_y, scale_z, 0);
-}
-
-pub fn getRotationQuat(_m: zm.Mat) zm.Quat {
-    // Ortho normalize given matrix.
-    const c1 = zm.normalize3(zm.f32x4(_m[0][0], _m[1][0], _m[2][0], 0));
-    const c2 = zm.normalize3(zm.f32x4(_m[0][1], _m[1][1], _m[2][1], 0));
-    const c3 = zm.normalize3(zm.f32x4(_m[0][2], _m[1][2], _m[2][2], 0));
-    var m = _m;
-    m[0][0] = c1[0];
-    m[1][0] = c1[1];
-    m[2][0] = c1[2];
-    m[0][1] = c2[0];
-    m[1][1] = c2[1];
-    m[2][1] = c2[2];
-    m[0][2] = c3[0];
-    m[1][2] = c3[1];
-    m[2][2] = c3[2];
-
-    // Extract rotation
-    return zm.quatFromMat(m);
-}
-
-pub fn getAxisX(m: zm.Mat) zm.Vec {
-    return zm.normalize3(zm.f32x4(m[0][0], m[0][1], m[0][2], 0.0));
-}
-
-pub fn getAxisY(m: zm.Mat) zm.Vec {
-    return zm.normalize3(zm.f32x4(m[1][0], m[1][1], m[1][2], 0.0));
-}
-
-pub fn getAxisZ(m: zm.Mat) zm.Vec {
-    return zm.normalize3(zm.f32x4(m[2][0], m[2][1], m[2][2], 0.0));
-}
-
-test "zmath.util.mat.translation" {
-    // zig fmt: off
-    const mat_data = [18]f32{
-        1.0,
-        2.0, 3.0, 4.0, 5.0,
-        6.0, 7.0, 8.0, 9.0,
-        10.0,11.0, 12.0,13.0,
-        14.0, 15.0, 16.0, 17.0,
-        18.0,
-    };
-    // zig fmt: on
-    const mat = zm.loadMat(mat_data[1..]);
-    const translation = getTranslationVec(mat);
-    try expect(zm.approxEqAbs(translation, zm.f32x4(14.0, 15.0, 16.0, 0.0), 0.0001));
-}
-
-test "zmath.util.mat.scale" {
-    const mat = zm.mul(zm.scaling(3, 4, 5), zm.translation(6, 7, 8));
-    const scale = getScaleVec(mat);
-    try expect(zm.approxEqAbs(scale, zm.f32x4(3.0, 4.0, 5.0, 0.0), 0.0001));
-}
-
-test "zmath.util.mat.rotation" {
-    const rotate_origin = zm.matFromRollPitchYaw(0.1, 1.2, 2.3);
-    const mat = zm.mul(zm.mul(rotate_origin, zm.scaling(3, 4, 5)), zm.translation(6, 7, 8));
-    const rotate_get = getRotationQuat(mat);
-    const v0 = zm.mul(zm.f32x4s(1), rotate_origin);
-    const v1 = zm.mul(zm.f32x4s(1), zm.quatToMat(rotate_get));
-    try expect(zm.approxEqAbs(v0, v1, 0.0001));
-}
-
-test "zmath.util.mat.z_vec" {
-    const degToRad = std.math.degreesToRadians;
-    var identity = zm.identity();
-    var z_vec = getAxisZ(identity);
-    try expect(zm.approxEqAbs(z_vec, zm.f32x4(0.0, 0.0, 1.0, 0), 0.0001));
-    const rot_yaw = zm.rotationY(degToRad(f32, 90));
-    identity = zm.mul(identity, rot_yaw);
-    z_vec = getAxisZ(identity);
-    try expect(zm.approxEqAbs(z_vec, zm.f32x4(1.0, 0.0, 0.0, 0), 0.0001));
-}
-
-test "zmath.util.mat.y_vec" {
-    const degToRad = std.math.degreesToRadians;
-    var identity = zm.identity();
-    var y_vec = getAxisY(identity);
-    try expect(zm.approxEqAbs(y_vec, zm.f32x4(0.0, 1.0, 0.0, 0), 0.01));
-    const rot_yaw = zm.rotationY(degToRad(f32, 90));
-    identity = zm.mul(identity, rot_yaw);
-    y_vec = getAxisY(identity);
-    try expect(zm.approxEqAbs(y_vec, zm.f32x4(0.0, 1.0, 0.0, 0), 0.01));
-    const rot_pitch = zm.rotationX(degToRad(f32, 90));
-    identity = zm.mul(identity, rot_pitch);
-    y_vec = getAxisY(identity);
-    try expect(zm.approxEqAbs(y_vec, zm.f32x4(0.0, 0.0, 1.0, 0), 0.01));
-}
-
-test "zmath.util.mat.right" {
-    const degToRad = std.math.degreesToRadians;
-    var identity = zm.identity();
-    var right = getAxisX(identity);
-    try expect(zm.approxEqAbs(right, zm.f32x4(1.0, 0.0, 0.0, 0), 0.01));
-    const rot_yaw = zm.rotationY(degToRad(f32, 90));
-    identity = zm.mul(identity, rot_yaw);
-    right = getAxisX(identity);
-    try expect(zm.approxEqAbs(right, zm.f32x4(0.0, 0.0, -1.0, 0), 0.01));
-    const rot_pitch = zm.rotationX(degToRad(f32, 90));
-    identity = zm.mul(identity, rot_pitch);
-    right = getAxisX(identity);
-    try expect(zm.approxEqAbs(right, zm.f32x4(0.0, 1.0, 0.0, 0), 0.01));
-}
-
-// ------------------------------------------------------------------------------
-// This software is available under 2 licenses -- choose whichever you prefer.
-// ------------------------------------------------------------------------------
-// ALTERNATIVE A - MIT License
-// Copyright (c) 2022 Michal Ziulek and Contributors
-// Permission is hereby granted, free of charge, to any person obtaining identity copy of
-// this software and associated documentation files (the "Software"), to deal in
-// the Software without restriction, including without limitation the rights to
-// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
-// of the Software, and to permit persons to whom the Software is furnished to do
-// so, subject to the following conditions:
-// The above copyright notice and this permission notice shall be included in all
-// copies or substantial portions of the Software.
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-// SOFTWARE.
-// ------------------------------------------------------------------------------
-// ALTERNATIVE B - Public Domain (www.unlicense.org)
-// This is free and unencumbered software released into the public domain.
-// Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
-// software, either in source code form or as identity compiled binary, for any purpose,
-// commercial or non-commercial, and by any means.
-// In jurisdictions that recognize copyright laws, the author or authors of this
-// software dedicate any and all copyright interest in the software to the public
-// domain. We make this dedication for the benefit of the public at large and to
-// the detriment of our heirs and successors. We intend this dedication to be an
-// overt act of relinquishment in perpetuity of all present and future rights to
-// this software under copyright law.
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-// AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-// ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
-// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-// ------------------------------------------------------------------------------
diff --git a/lib/zmath/src/zmath.zig b/lib/zmath/src/zmath.zig
deleted file mode 100644
index 383ed55..0000000
--- a/lib/zmath/src/zmath.zig
+++ /dev/null
@@ -1,4442 +0,0 @@
-// ==============================================================================
-//
-// SIMD math library for game developers
-// https://github.com/michal-z/zig-gamedev/tree/main/libs/zmath
-//
-// Should work on all OSes supported by Zig. Works on x86_64 and ARM.
-// Provides ~140 optimized routines and ~70 extensive tests.
-// Can be used with any graphics API.
-//
-// zmath uses row-major matrices, row vectors (each row vector is stored in a SIMD register).
-// Handedness is determined by which function version is used (Rh vs. Lh),
-// otherwise the function works with either left-handed or right-handed view coordinates.
-//
-// const va = f32x4(1.0, 2.0, 3.0, 1.0);
-// const vb = f32x4(-1.0, 1.0, -1.0, 1.0);
-// const v0 = va + vb - f32x4(0.0, 1.0, 0.0, 1.0) * f32x4s(3.0);
-// const v1 = cross3(va, vb) + f32x4(1.0, 1.0, 1.0, 1.0);
-// const v2 = va + dot3(va, vb) / v1; // dotN() returns scalar replicated on all vector components
-//
-// const m = rotationX(math.pi * 0.25);
-// const v = f32x4(...);
-// const v0 = mul(v, m); // 'v' treated as a row vector
-// const v1 = mul(m, v); // 'v' treated as a column vector
-// const f = m[row][column];
-//
-// const b = va < vb;
-// if (all(b, 0)) { ... } // '0' means check all vector components; if all are 'true'
-// if (all(b, 3)) { ... } // '3' means check first three vector components; if all first three are 'true'
-// if (any(b, 0)) { ... } // '0' means check all vector components; if any is 'true'
-// if (any(b, 3)) { ... } // '3' means check first three vector components; if any from first three is 'true'
-//
-// var v4 = load(mem[0..], F32x4, 0);
-// var v8 = load(mem[100..], F32x8, 0);
-// var v16 = load(mem[200..], F32x16, 0);
-//
-// var camera_position = [3]f32{ 1.0, 2.0, 3.0 };
-// var cam_pos = loadArr3(camera_position);
-// ...
-// storeArr3(&camera_position, cam_pos);
-//
-// v4 = sin(v4); // SIMDx4
-// v8 = cos(v8); // .x86_64 -> 2 x SIMDx4, .x86_64+avx+fma -> SIMDx8
-// v16 = atan(v16); // .x86_64 -> 4 x SIMDx4, .x86_64+avx+fma -> 2 x SIMDx8, .x86_64+avx512f -> SIMDx16
-//
-// store(mem[0..], v4, 0);
-// store(mem[100..], v8, 0);
-// store(mem[200..], v16, 0);
-//
-// ------------------------------------------------------------------------------
-// 1. Initialization functions
-// ------------------------------------------------------------------------------
-//
-// f32x4(e0: f32, e1: f32, e2: f32, e3: f32) F32x4
-// f32x8(e0: f32, e1: f32, e2: f32, e3: f32, e4: f32, e5: f32, e6: f32, e7: f32) F32x8
-// f32x16(e0: f32, e1: f32, e2: f32, e3: f32, e4: f32, e5: f32, e6: f32, e7: f32,
-//        e8: f32, e9: f32, ea: f32, eb: f32, ec: f32, ed: f32, ee: f32, ef: f32) F32x16
-//
-// f32x4s(e0: f32) F32x4
-// f32x8s(e0: f32) F32x8
-// f32x16s(e0: f32) F32x16
-//
-// boolx4(e0: bool, e1: bool, e2: bool, e3: bool) Boolx4
-// boolx8(e0: bool, e1: bool, e2: bool, e3: bool, e4: bool, e5: bool, e6: bool, e7: bool) Boolx8
-// boolx16(e0: bool, e1: bool, e2: bool, e3: bool, e4: bool, e5: bool, e6: bool, e7: bool,
-//         e8: bool, e9: bool, ea: bool, eb: bool, ec: bool, ed: bool, ee: bool, ef: bool) Boolx16
-//
-// load(mem: []const f32, comptime T: type, comptime len: u32) T
-// store(mem: []f32, v: anytype, comptime len: u32) void
-//
-// loadArr2(arr: [2]f32) F32x4
-// loadArr2zw(arr: [2]f32, z: f32, w: f32) F32x4
-// loadArr3(arr: [3]f32) F32x4
-// loadArr3w(arr: [3]f32, w: f32) F32x4
-// loadArr4(arr: [4]f32) F32x4
-//
-// storeArr2(arr: *[2]f32, v: F32x4) void
-// storeArr3(arr: *[3]f32, v: F32x4) void
-// storeArr4(arr: *[4]f32, v: F32x4) void
-//
-// arr3Ptr(ptr: anytype) *const [3]f32
-// arrNPtr(ptr: anytype) [*]const f32
-//
-// splat(comptime T: type, value: f32) T
-// splatInt(comptime T: type, value: u32) T
-//
-// ------------------------------------------------------------------------------
-// 2. Functions that work on all vector components (F32xN = F32x4 or F32x8 or F32x16)
-// ------------------------------------------------------------------------------
-//
-// all(vb: anytype, comptime len: u32) bool
-// any(vb: anytype, comptime len: u32) bool
-//
-// isNearEqual(v0: F32xN, v1: F32xN, epsilon: F32xN) BoolxN
-// isNan(v: F32xN) BoolxN
-// isInf(v: F32xN) BoolxN
-// isInBounds(v: F32xN, bounds: F32xN) BoolxN
-//
-// andInt(v0: F32xN, v1: F32xN) F32xN
-// andNotInt(v0: F32xN, v1: F32xN) F32xN
-// orInt(v0: F32xN, v1: F32xN) F32xN
-// norInt(v0: F32xN, v1: F32xN) F32xN
-// xorInt(v0: F32xN, v1: F32xN) F32xN
-//
-// minFast(v0: F32xN, v1: F32xN) F32xN
-// maxFast(v0: F32xN, v1: F32xN) F32xN
-// min(v0: F32xN, v1: F32xN) F32xN
-// max(v0: F32xN, v1: F32xN) F32xN
-// round(v: F32xN) F32xN
-// floor(v: F32xN) F32xN
-// trunc(v: F32xN) F32xN
-// ceil(v: F32xN) F32xN
-// clamp(v0: F32xN, v1: F32xN) F32xN
-// clampFast(v0: F32xN, v1: F32xN) F32xN
-// saturate(v: F32xN) F32xN
-// saturateFast(v: F32xN) F32xN
-// lerp(v0: F32xN, v1: F32xN, t: f32) F32xN
-// lerpV(v0: F32xN, v1: F32xN, t: F32xN) F32xN
-// lerpInverse(v0: F32xN, v1: F32xN, t: f32) F32xN
-// lerpInverseV(v0: F32xN, v1: F32xN, t: F32xN) F32xN
-// mapLinear(v: F32xN, min1: f32, max1: f32, min2: f32, max2: f32) F32xN
-// mapLinearV(v: F32xN, min1: F32xN, max1: F32xN, min2: F32xN, max2: F32xN) F32xN
-// sqrt(v: F32xN) F32xN
-// abs(v: F32xN) F32xN
-// mod(v0: F32xN, v1: F32xN) F32xN
-// modAngle(v: F32xN) F32xN
-// mulAdd(v0: F32xN, v1: F32xN, v2: F32xN) F32xN
-// select(mask: BoolxN, v0: F32xN, v1: F32xN)
-// sin(v: F32xN) F32xN
-// cos(v: F32xN) F32xN
-// sincos(v: F32xN) [2]F32xN
-// asin(v: F32xN) F32xN
-// acos(v: F32xN) F32xN
-// atan(v: F32xN) F32xN
-// atan2(vy: F32xN, vx: F32xN) F32xN
-// cmulSoa(re0: F32xN, im0: F32xN, re1: F32xN, im1: F32xN) [2]F32xN
-//
-// ------------------------------------------------------------------------------
-// 3. 2D, 3D, 4D vector functions
-// ------------------------------------------------------------------------------
-//
-// swizzle(v: Vec, c, c, c, c) Vec (comptime c = .x | .y | .z | .w)
-// dot2(v0: Vec, v1: Vec) F32x4
-// dot3(v0: Vec, v1: Vec) F32x4
-// dot4(v0: Vec, v1: Vec) F32x4
-// cross3(v0: Vec, v1: Vec) Vec
-// lengthSq2(v: Vec) F32x4
-// lengthSq3(v: Vec) F32x4
-// lengthSq4(v: Vec) F32x4
-// length2(v: Vec) F32x4
-// length3(v: Vec) F32x4
-// length4(v: Vec) F32x4
-// normalize2(v: Vec) Vec
-// normalize3(v: Vec) Vec
-// normalize4(v: Vec) Vec
-//
-// vecToArr2(v: Vec) [2]f32
-// vecToArr3(v: Vec) [3]f32
-// vecToArr4(v: Vec) [4]f32
-//
-// ------------------------------------------------------------------------------
-// 4. Matrix functions
-// ------------------------------------------------------------------------------
-//
-// identity() Mat
-// mul(m0: Mat, m1: Mat) Mat
-// mul(s: f32, m: Mat) Mat
-// mul(m: Mat, s: f32) Mat
-// mul(v: Vec, m: Mat) Vec
-// mul(m: Mat, v: Vec) Vec
-// transpose(m: Mat) Mat
-// rotationX(angle: f32) Mat
-// rotationY(angle: f32) Mat
-// rotationZ(angle: f32) Mat
-// translation(x: f32, y: f32, z: f32) Mat
-// translationV(v: Vec) Mat
-// scaling(x: f32, y: f32, z: f32) Mat
-// scalingV(v: Vec) Mat
-// lookToLh(eyepos: Vec, eyedir: Vec, updir: Vec) Mat
-// lookAtLh(eyepos: Vec, focuspos: Vec, updir: Vec) Mat
-// lookToRh(eyepos: Vec, eyedir: Vec, updir: Vec) Mat
-// lookAtRh(eyepos: Vec, focuspos: Vec, updir: Vec) Mat
-// perspectiveFovLh(fovy: f32, aspect: f32, near: f32, far: f32) Mat
-// perspectiveFovRh(fovy: f32, aspect: f32, near: f32, far: f32) Mat
-// perspectiveFovLhGl(fovy: f32, aspect: f32, near: f32, far: f32) Mat
-// perspectiveFovRhGl(fovy: f32, aspect: f32, near: f32, far: f32) Mat
-// orthographicLh(w: f32, h: f32, near: f32, far: f32) Mat
-// orthographicRh(w: f32, h: f32, near: f32, far: f32) Mat
-// orthographicLhGl(w: f32, h: f32, near: f32, far: f32) Mat
-// orthographicRhGl(w: f32, h: f32, near: f32, far: f32) Mat
-// orthographicOffCenterLh(left: f32, right: f32, top: f32, bottom: f32, near: f32, far: f32) Mat
-// orthographicOffCenterRh(left: f32, right: f32, top: f32, bottom: f32, near: f32, far: f32) Mat
-// orthographicOffCenterLhGl(left: f32, right: f32, top: f32, bottom: f32, near: f32, far: f32) Mat
-// orthographicOffCenterRhGl(left: f32, right: f32, top: f32, bottom: f32, near: f32, far: f32) Mat
-// determinant(m: Mat) F32x4
-// inverse(m: Mat) Mat
-// inverseDet(m: Mat, det: ?*F32x4) Mat
-// matToQuat(m: Mat) Quat
-// matFromAxisAngle(axis: Vec, angle: f32) Mat
-// matFromNormAxisAngle(axis: Vec, angle: f32) Mat
-// matFromQuat(quat: Quat) Mat
-// matFromRollPitchYaw(pitch: f32, yaw: f32, roll: f32) Mat
-// matFromRollPitchYawV(angles: Vec) Mat
-//
-// loadMat(mem: []const f32) Mat
-// loadMat43(mem: []const f32) Mat
-// loadMat34(mem: []const f32) Mat
-// storeMat(mem: []f32, m: Mat) void
-// storeMat43(mem: []f32, m: Mat) void
-// storeMat34(mem: []f32, m: Mat) void
-//
-// matToArr(m: Mat) [16]f32
-// matToArr43(m: Mat) [12]f32
-// matToArr34(m: Mat) [12]f32
-//
-// ------------------------------------------------------------------------------
-// 5. Quaternion functions
-// ------------------------------------------------------------------------------
-//
-// qmul(q0: Quat, q1: Quat) Quat
-// qidentity() Quat
-// conjugate(quat: Quat) Quat
-// inverse(q: Quat) Quat
-// slerp(q0: Quat, q1: Quat, t: f32) Quat
-// slerpV(q0: Quat, q1: Quat, t: F32x4) Quat
-// quatToMat(quat: Quat) Mat
-// quatToAxisAngle(quat: Quat, axis: *Vec, angle: *f32) void
-// quatFromMat(m: Mat) Quat
-// quatFromAxisAngle(axis: Vec, angle: f32) Quat
-// quatFromNormAxisAngle(axis: Vec, angle: f32) Quat
-// quatFromRollPitchYaw(pitch: f32, yaw: f32, roll: f32) Quat
-// quatFromRollPitchYawV(angles: Vec) Quat
-//
-// ------------------------------------------------------------------------------
-// 6. Color functions
-// ------------------------------------------------------------------------------
-//
-// adjustSaturation(color: F32x4, saturation: f32) F32x4
-// adjustContrast(color: F32x4, contrast: f32) F32x4
-// rgbToHsl(rgb: F32x4) F32x4
-// hslToRgb(hsl: F32x4) F32x4
-// rgbToHsv(rgb: F32x4) F32x4
-// hsvToRgb(hsv: F32x4) F32x4
-// rgbToSrgb(rgb: F32x4) F32x4
-// srgbToRgb(srgb: F32x4) F32x4
-//
-// ------------------------------------------------------------------------------
-// X. Misc functions
-// ------------------------------------------------------------------------------
-//
-// linePointDistance(linept0: Vec, linept1: Vec, pt: Vec) F32x4
-// sin(v: f32) f32
-// cos(v: f32) f32
-// sincos(v: f32) [2]f32
-// asin(v: f32) f32
-// acos(v: f32) f32
-//
-// fftInitUnityTable(unitytable: []F32x4) void
-// fft(re: []F32x4, im: []F32x4, unitytable: []const F32x4) void
-// ifft(re: []F32x4, im: []const F32x4, unitytable: []const F32x4) void
-//
-// ==============================================================================
-
-// Fundamental types
-pub const F32x4 = @Vector(4, f32);
-pub const F32x8 = @Vector(8, f32);
-pub const F32x16 = @Vector(16, f32);
-pub const Boolx4 = @Vector(4, bool);
-pub const Boolx8 = @Vector(8, bool);
-pub const Boolx16 = @Vector(16, bool);
-
-// "Higher-level" aliases
-pub const Vec = F32x4;
-pub const Mat = [4]F32x4;
-pub const Quat = F32x4;
-
-const builtin = @import("builtin");
-const std = @import("std");
-const math = std.math;
-const assert = std.debug.assert;
-const expect = std.testing.expect;
-
-const cpu_arch = builtin.cpu.arch;
-const has_avx = if (cpu_arch == .x86_64) std.Target.x86.featureSetHas(builtin.cpu.features, .avx) else false;
-const has_avx512f = if (cpu_arch == .x86_64) std.Target.x86.featureSetHas(builtin.cpu.features, .avx512f) else false;
-const has_fma = if (cpu_arch == .x86_64) std.Target.x86.featureSetHas(builtin.cpu.features, .fma) else false;
-// ------------------------------------------------------------------------------
-//
-// 1. Initialization functions
-//
-// ------------------------------------------------------------------------------
-pub inline fn f32x4(e0: f32, e1: f32, e2: f32, e3: f32) F32x4 {
-    return .{ e0, e1, e2, e3 };
-}
-pub inline fn f32x8(e0: f32, e1: f32, e2: f32, e3: f32, e4: f32, e5: f32, e6: f32, e7: f32) F32x8 {
-    return .{ e0, e1, e2, e3, e4, e5, e6, e7 };
-}
-// zig fmt: off
-pub inline fn f32x16(
-    e0: f32, e1: f32, e2: f32, e3: f32, e4: f32, e5: f32, e6: f32, e7: f32,
-    e8: f32, e9: f32, ea: f32, eb: f32, ec: f32, ed: f32, ee: f32, ef: f32) F32x16 {
-    return .{ e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, ea, eb, ec, ed, ee, ef };
-}
-// zig fmt: on
-
-pub inline fn f32x4s(e0: f32) F32x4 {
-    return splat(F32x4, e0);
-}
-pub inline fn f32x8s(e0: f32) F32x8 {
-    return splat(F32x8, e0);
-}
-pub inline fn f32x16s(e0: f32) F32x16 {
-    return splat(F32x16, e0);
-}
-
-pub inline fn boolx4(e0: bool, e1: bool, e2: bool, e3: bool) Boolx4 {
-    return .{ e0, e1, e2, e3 };
-}
-pub inline fn boolx8(e0: bool, e1: bool, e2: bool, e3: bool, e4: bool, e5: bool, e6: bool, e7: bool) Boolx8 {
-    return .{ e0, e1, e2, e3, e4, e5, e6, e7 };
-}
-// zig fmt: off
-pub inline fn boolx16(
-    e0: bool, e1: bool, e2: bool, e3: bool, e4: bool, e5: bool, e6: bool, e7: bool,
-    e8: bool, e9: bool, ea: bool, eb: bool, ec: bool, ed: bool, ee: bool, ef: bool) Boolx16 {
-    return .{ e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, ea, eb, ec, ed, ee, ef };
-}
-// zig fmt: on
-
-pub inline fn veclen(comptime T: type) comptime_int {
-    return @typeInfo(T).Vector.len;
-}
-
-pub inline fn splat(comptime T: type, value: f32) T {
-    return @splat(veclen(T), value);
-}
-pub inline fn splatInt(comptime T: type, value: u32) T {
-    return @splat(veclen(T), @bitCast(f32, value));
-}
-
-pub fn load(mem: []const f32, comptime T: type, comptime len: u32) T {
-    var v = splat(T, 0.0);
-    comptime var loop_len = if (len == 0) veclen(T) else len;
-    comptime var i: u32 = 0;
-    inline while (i < loop_len) : (i += 1) {
-        v[i] = mem[i];
-    }
-    return v;
-}
-test "zmath.load" {
-    const a = [7]f32{ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0 };
-    var ptr = &a;
-    var i: u32 = 0;
-    const v0 = load(a[i..], F32x4, 2);
-    try expect(approxEqAbs(v0, F32x4{ 1.0, 2.0, 0.0, 0.0 }, 0.0));
-    i += 2;
-    const v1 = load(a[i .. i + 2], F32x4, 2);
-    try expect(approxEqAbs(v1, F32x4{ 3.0, 4.0, 0.0, 0.0 }, 0.0));
-    const v2 = load(a[5..7], F32x4, 2);
-    try expect(approxEqAbs(v2, F32x4{ 6.0, 7.0, 0.0, 0.0 }, 0.0));
-    const v3 = load(ptr[1..], F32x4, 2);
-    try expect(approxEqAbs(v3, F32x4{ 2.0, 3.0, 0.0, 0.0 }, 0.0));
-    i += 1;
-    const v4 = load(ptr[i .. i + 2], F32x4, 2);
-    try expect(approxEqAbs(v4, F32x4{ 4.0, 5.0, 0.0, 0.0 }, 0.0));
-}
-
-pub fn store(mem: []f32, v: anytype, comptime len: u32) void {
-    const T = @TypeOf(v);
-    comptime var loop_len = if (len == 0) veclen(T) else len;
-    comptime var i: u32 = 0;
-    inline while (i < loop_len) : (i += 1) {
-        mem[i] = v[i];
-    }
-}
-test "zmath.store" {
-    var a = [7]f32{ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0 };
-    const v = load(a[1..], F32x4, 3);
-    store(a[2..], v, 4);
-    try expect(a[0] == 1.0);
-    try expect(a[1] == 2.0);
-    try expect(a[2] == 2.0);
-    try expect(a[3] == 3.0);
-    try expect(a[4] == 4.0);
-    try expect(a[5] == 0.0);
-}
-
-pub inline fn loadArr2(arr: [2]f32) F32x4 {
-    return f32x4(arr[0], arr[1], 0.0, 0.0);
-}
-pub inline fn loadArr2zw(arr: [2]f32, z: f32, w: f32) F32x4 {
-    return f32x4(arr[0], arr[1], z, w);
-}
-pub inline fn loadArr3(arr: [3]f32) F32x4 {
-    return f32x4(arr[0], arr[1], arr[2], 0.0);
-}
-pub inline fn loadArr3w(arr: [3]f32, w: f32) F32x4 {
-    return f32x4(arr[0], arr[1], arr[2], w);
-}
-pub inline fn loadArr4(arr: [4]f32) F32x4 {
-    return f32x4(arr[0], arr[1], arr[2], arr[3]);
-}
-
-pub inline fn storeArr2(arr: *[2]f32, v: F32x4) void {
-    arr.* = .{ v[0], v[1] };
-}
-pub inline fn storeArr3(arr: *[3]f32, v: F32x4) void {
-    arr.* = .{ v[0], v[1], v[2] };
-}
-pub inline fn storeArr4(arr: *[4]f32, v: F32x4) void {
-    arr.* = .{ v[0], v[1], v[2], v[3] };
-}
-
-pub inline fn arr3Ptr(ptr: anytype) *const [3]f32 {
-    comptime assert(@typeInfo(@TypeOf(ptr)) == .Pointer);
-    const T = std.meta.Child(@TypeOf(ptr));
-    comptime assert(T == F32x4);
-    return @ptrCast(*const [3]f32, ptr);
-}
-
-pub inline fn arrNPtr(ptr: anytype) [*]const f32 {
-    comptime assert(@typeInfo(@TypeOf(ptr)) == .Pointer);
-    const T = std.meta.Child(@TypeOf(ptr));
-    comptime assert(T == Mat or T == F32x4 or T == F32x8 or T == F32x16);
-    return @ptrCast([*]const f32, ptr);
-}
-test "zmath.arrNPtr" {
-    {
-        const mat = identity();
-        const f32ptr = arrNPtr(&mat);
-        try expect(f32ptr[0] == 1.0);
-        try expect(f32ptr[5] == 1.0);
-        try expect(f32ptr[10] == 1.0);
-        try expect(f32ptr[15] == 1.0);
-    }
-    {
-        const v8 = f32x8s(1.0);
-        const f32ptr = arrNPtr(&v8);
-        try expect(f32ptr[1] == 1.0);
-        try expect(f32ptr[7] == 1.0);
-    }
-}
-
-test "zmath.loadArr" {
-    {
-        const camera_position = [3]f32{ 1.0, 2.0, 3.0 };
-        const simd_reg = loadArr3(camera_position);
-        try expect(approxEqAbs(simd_reg, f32x4(1.0, 2.0, 3.0, 0.0), 0.0));
-    }
-    {
-        const camera_position = [3]f32{ 1.0, 2.0, 3.0 };
-        const simd_reg = loadArr3w(camera_position, 1.0);
-        try expect(approxEqAbs(simd_reg, f32x4(1.0, 2.0, 3.0, 1.0), 0.0));
-    }
-}
-
-pub inline fn vecToArr2(v: Vec) [2]f32 {
-    return .{ v[0], v[1] };
-}
-pub inline fn vecToArr3(v: Vec) [3]f32 {
-    return .{ v[0], v[1], v[2] };
-}
-pub inline fn vecToArr4(v: Vec) [4]f32 {
-    return .{ v[0], v[1], v[2], v[3] };
-}
-// ------------------------------------------------------------------------------
-//
-// 2. Functions that work on all vector components (F32xN = F32x4 or F32x8 or F32x16)
-//
-// ------------------------------------------------------------------------------
-pub fn all(vb: anytype, comptime len: u32) bool {
-    const T = @TypeOf(vb);
-    if (len > veclen(T)) {
-        @compileError("zmath.all(): 'len' is greater than vector len of type " ++ @typeName(T));
-    }
-    comptime var loop_len = if (len == 0) veclen(T) else len;
-    const ab: [veclen(T)]bool = vb;
-    comptime var i: u32 = 0;
-    var result = true;
-    inline while (i < loop_len) : (i += 1) {
-        result = result and ab[i];
-    }
-    return result;
-}
-test "zmath.all" {
-    try expect(all(boolx8(true, true, true, true, true, false, true, false), 5) == true);
-    try expect(all(boolx8(true, true, true, true, true, false, true, false), 6) == false);
-    try expect(all(boolx8(true, true, true, true, false, false, false, false), 4) == true);
-    try expect(all(boolx4(true, true, true, false), 3) == true);
-    try expect(all(boolx4(true, true, true, false), 1) == true);
-    try expect(all(boolx4(true, false, false, false), 1) == true);
-    try expect(all(boolx4(false, true, false, false), 1) == false);
-    try expect(all(boolx8(true, true, true, true, true, false, true, false), 0) == false);
-    try expect(all(boolx4(false, true, false, false), 0) == false);
-    try expect(all(boolx4(true, true, true, true), 0) == true);
-}
-
-pub fn any(vb: anytype, comptime len: u32) bool {
-    const T = @TypeOf(vb);
-    if (len > veclen(T)) {
-        @compileError("zmath.any(): 'len' is greater than vector len of type " ++ @typeName(T));
-    }
-    comptime var loop_len = if (len == 0) veclen(T) else len;
-    const ab: [veclen(T)]bool = vb;
-    comptime var i: u32 = 0;
-    var result = false;
-    inline while (i < loop_len) : (i += 1) {
-        result = result or ab[i];
-    }
-    return result;
-}
-test "zmath.any" {
-    try expect(any(boolx8(true, true, true, true, true, false, true, false), 0) == true);
-    try expect(any(boolx8(false, false, false, true, true, false, true, false), 3) == false);
-    try expect(any(boolx8(false, false, false, false, false, true, false, false), 4) == false);
-}
-
-pub inline fn isNearEqual(
-    v0: anytype,
-    v1: anytype,
-    epsilon: anytype,
-) @Vector(veclen(@TypeOf(v0)), bool) {
-    const T = @TypeOf(v0, v1, epsilon);
-    const delta = v0 - v1;
-    const temp = maxFast(delta, splat(T, 0.0) - delta);
-    return temp <= epsilon;
-}
-test "zmath.isNearEqual" {
-    if (builtin.target.os.tag == .macos and builtin.zig_backend != .stage1) return error.SkipZigTest;
-    {
-        const v0 = f32x4(1.0, 2.0, -3.0, 4.001);
-        const v1 = f32x4(1.0, 2.1, 3.0, 4.0);
-        const b = isNearEqual(v0, v1, splat(F32x4, 0.01));
-        try expect(@reduce(.And, b == boolx4(true, false, false, true)));
-    }
-    {
-        const v0 = f32x8(1.0, 2.0, -3.0, 4.001, 1.001, 2.3, -0.0, 0.0);
-        const v1 = f32x8(1.0, 2.1, 3.0, 4.0, -1.001, 2.1, 0.0, 0.0);
-        const b = isNearEqual(v0, v1, splat(F32x8, 0.01));
-        try expect(@reduce(.And, b == boolx8(true, false, false, true, false, false, true, true)));
-    }
-    try expect(all(isNearEqual(
-        splat(F32x4, math.inf_f32),
-        splat(F32x4, math.inf_f32),
-        splat(F32x4, 0.0001),
-    ), 0) == false);
-    try expect(all(isNearEqual(
-        splat(F32x4, -math.inf_f32),
-        splat(F32x4, math.inf_f32),
-        splat(F32x4, 0.0001),
-    ), 0) == false);
-    try expect(all(isNearEqual(
-        splat(F32x4, -math.inf_f32),
-        splat(F32x4, -math.inf_f32),
-        splat(F32x4, 0.0001),
-    ), 0) == false);
-    try expect(all(isNearEqual(
-        splat(F32x4, -math.nan_f32),
-        splat(F32x4, math.inf_f32),
-        splat(F32x4, 0.0001),
-    ), 0) == false);
-}
-
-pub inline fn isNan(
-    v: anytype,
-) @Vector(veclen(@TypeOf(v)), bool) {
-    return v != v;
-}
-test "zmath.isNan" {
-    {
-        const v0 = f32x4(math.inf_f32, math.nan_f32, math.nan_f32, 7.0);
-        const b = isNan(v0);
-        try expect(@reduce(.And, b == boolx4(false, true, true, false)));
-    }
-    {
-        const v0 = f32x8(0, math.nan_f32, 0, 0, math.inf_f32, math.nan_f32, math.qnan_f32, 7.0);
-        const b = isNan(v0);
-        try expect(@reduce(.And, b == boolx8(false, true, false, false, false, true, true, false)));
-    }
-}
-
-pub inline fn isInf(
-    v: anytype,
-) @Vector(veclen(@TypeOf(v)), bool) {
-    const T = @TypeOf(v);
-    return abs(v) == splat(T, math.inf_f32);
-}
-test "zmath.isInf" {
-    {
-        const v0 = f32x4(math.inf_f32, math.nan_f32, math.qnan_f32, 7.0);
-        const b = isInf(v0);
-        try expect(@reduce(.And, b == boolx4(true, false, false, false)));
-    }
-    {
-        const v0 = f32x8(0, math.inf_f32, 0, 0, math.inf_f32, math.nan_f32, math.qnan_f32, 7.0);
-        const b = isInf(v0);
-        try expect(@reduce(.And, b == boolx8(false, true, false, false, true, false, false, false)));
-    }
-}
-
-pub inline fn isInBounds(
-    v: anytype,
-    bounds: anytype,
-) @Vector(veclen(@TypeOf(v)), bool) {
-    const T = @TypeOf(v, bounds);
-    const Tu = @Vector(veclen(T), u1);
-    const Tr = @Vector(veclen(T), bool);
-
-    // 2 x cmpleps, xorps, load, andps
-    const b0 = v <= bounds;
-    const b1 = (bounds * splat(T, -1.0)) <= v;
-    const b0u = @bitCast(Tu, b0);
-    const b1u = @bitCast(Tu, b1);
-    return @bitCast(Tr, b0u & b1u);
-}
-test "zmath.isInBounds" {
-    {
-        const v0 = f32x4(0.5, -2.0, -1.0, 1.9);
-        const v1 = f32x4(-1.6, -2.001, -1.0, 1.9);
-        const bounds = f32x4(1.0, 2.0, 1.0, 2.0);
-        const b0 = isInBounds(v0, bounds);
-        const b1 = isInBounds(v1, bounds);
-        try expect(@reduce(.And, b0 == boolx4(true, true, true, true)));
-        try expect(@reduce(.And, b1 == boolx4(false, false, true, true)));
-    }
-    {
-        const v0 = f32x8(2.0, 1.0, 2.0, 1.0, 0.5, -2.0, -1.0, 1.9);
-        const bounds = f32x8(1.0, 1.0, 1.0, math.inf_f32, 1.0, math.nan_f32, 1.0, 2.0);
-        const b0 = isInBounds(v0, bounds);
-        try expect(@reduce(.And, b0 == boolx8(false, true, false, true, true, false, true, true)));
-    }
-}
-
-pub inline fn andInt(v0: anytype, v1: anytype) @TypeOf(v0, v1) {
-    const T = @TypeOf(v0, v1);
-    const Tu = @Vector(veclen(T), u32);
-    const v0u = @bitCast(Tu, v0);
-    const v1u = @bitCast(Tu, v1);
-    return @bitCast(T, v0u & v1u); // andps
-}
-test "zmath.andInt" {
-    {
-        const v0 = f32x4(0, @bitCast(f32, ~@as(u32, 0)), 0, @bitCast(f32, ~@as(u32, 0)));
-        const v1 = f32x4(1.0, 2.0, 3.0, math.inf_f32);
-        const v = andInt(v0, v1);
-        try expect(v[3] == math.inf_f32);
-        try expect(approxEqAbs(v, f32x4(0.0, 2.0, 0.0, math.inf_f32), 0.0));
-    }
-    {
-        const v0 = f32x8(0, 0, 0, 0, 0, @bitCast(f32, ~@as(u32, 0)), 0, @bitCast(f32, ~@as(u32, 0)));
-        const v1 = f32x8(0, 0, 0, 0, 1.0, 2.0, 3.0, math.inf_f32);
-        const v = andInt(v0, v1);
-        try expect(v[7] == math.inf_f32);
-        try expect(approxEqAbs(v, f32x8(0, 0, 0, 0, 0.0, 2.0, 0.0, math.inf_f32), 0.0));
-    }
-}
-
-pub inline fn andNotInt(v0: anytype, v1: anytype) @TypeOf(v0, v1) {
-    const T = @TypeOf(v0, v1);
-    const Tu = @Vector(veclen(T), u32);
-    const v0u = @bitCast(Tu, v0);
-    const v1u = @bitCast(Tu, v1);
-    return @bitCast(T, ~v0u & v1u); // andnps
-}
-test "zmath.andNotInt" {
-    {
-        const v0 = f32x4(1.0, 2.0, 3.0, 4.0);
-        const v1 = f32x4(0, @bitCast(f32, ~@as(u32, 0)), 0, @bitCast(f32, ~@as(u32, 0)));
-        const v = andNotInt(v1, v0);
-        try expect(approxEqAbs(v, f32x4(1.0, 0.0, 3.0, 0.0), 0.0));
-    }
-    {
-        const v0 = f32x8(0, 0, 0, 0, 1.0, 2.0, 3.0, 4.0);
-        const v1 = f32x8(0, 0, 0, 0, 0, @bitCast(f32, ~@as(u32, 0)), 0, @bitCast(f32, ~@as(u32, 0)));
-        const v = andNotInt(v1, v0);
-        try expect(approxEqAbs(v, f32x8(0, 0, 0, 0, 1.0, 0.0, 3.0, 0.0), 0.0));
-    }
-}
-
-pub inline fn orInt(v0: anytype, v1: anytype) @TypeOf(v0, v1) {
-    const T = @TypeOf(v0, v1);
-    const Tu = @Vector(veclen(T), u32);
-    const v0u = @bitCast(Tu, v0);
-    const v1u = @bitCast(Tu, v1);
-    return @bitCast(T, v0u | v1u); // orps
-}
-test "zmath.orInt" {
-    {
-        const v0 = f32x4(0, @bitCast(f32, ~@as(u32, 0)), 0, 0);
-        const v1 = f32x4(1.0, 2.0, 3.0, 4.0);
-        const v = orInt(v0, v1);
-        try expect(v[0] == 1.0);
-        try expect(@bitCast(u32, v[1]) == ~@as(u32, 0));
-        try expect(v[2] == 3.0);
-        try expect(v[3] == 4.0);
-    }
-    {
-        const v0 = f32x8(0, 0, 0, 0, 0, @bitCast(f32, ~@as(u32, 0)), 0, 0);
-        const v1 = f32x8(0, 0, 0, 0, 1.0, 2.0, 3.0, 4.0);
-        const v = orInt(v0, v1);
-        try expect(v[4] == 1.0);
-        try expect(@bitCast(u32, v[5]) == ~@as(u32, 0));
-        try expect(v[6] == 3.0);
-        try expect(v[7] == 4.0);
-    }
-}
-
-pub inline fn norInt(v0: anytype, v1: anytype) @TypeOf(v0, v1) {
-    const T = @TypeOf(v0, v1);
-    const Tu = @Vector(veclen(T), u32);
-    const v0u = @bitCast(Tu, v0);
-    const v1u = @bitCast(Tu, v1);
-    return @bitCast(T, ~(v0u | v1u)); // por, pcmpeqd, pxor
-}
-
-pub inline fn xorInt(v0: anytype, v1: anytype) @TypeOf(v0, v1) {
-    const T = @TypeOf(v0, v1);
-    const Tu = @Vector(veclen(T), u32);
-    const v0u = @bitCast(Tu, v0);
-    const v1u = @bitCast(Tu, v1);
-    return @bitCast(T, v0u ^ v1u); // xorps
-}
-test "zmath.xorInt" {
-    {
-        const v0 = f32x4(1.0, @bitCast(f32, ~@as(u32, 0)), 0, 0);
-        const v1 = f32x4(1.0, 0, 0, 0);
-        const v = xorInt(v0, v1);
-        try expect(v[0] == 0.0);
-        try expect(@bitCast(u32, v[1]) == ~@as(u32, 0));
-        try expect(v[2] == 0.0);
-        try expect(v[3] == 0.0);
-    }
-    {
-        const v0 = f32x8(0, 0, 0, 0, 1.0, @bitCast(f32, ~@as(u32, 0)), 0, 0);
-        const v1 = f32x8(0, 0, 0, 0, 1.0, 0, 0, 0);
-        const v = xorInt(v0, v1);
-        try expect(v[4] == 0.0);
-        try expect(@bitCast(u32, v[5]) == ~@as(u32, 0));
-        try expect(v[6] == 0.0);
-        try expect(v[7] == 0.0);
-    }
-}
-
-pub inline fn minFast(v0: anytype, v1: anytype) @TypeOf(v0, v1) {
-    return select(v0 < v1, v0, v1); // minps
-}
-test "zmath.minFast" {
-    {
-        const v0 = f32x4(1.0, 3.0, 2.0, 7.0);
-        const v1 = f32x4(2.0, 1.0, 4.0, math.inf_f32);
-        const v = minFast(v0, v1);
-        try expect(approxEqAbs(v, f32x4(1.0, 1.0, 2.0, 7.0), 0.0));
-    }
-    {
-        const v0 = f32x4(1.0, math.nan_f32, 5.0, math.qnan_f32);
-        const v1 = f32x4(2.0, 1.0, 4.0, math.inf_f32);
-        const v = minFast(v0, v1);
-        try expect(v[0] == 1.0);
-        try expect(v[1] == 1.0);
-        try expect(!math.isNan(v[1]));
-        try expect(v[2] == 4.0);
-        try expect(v[3] == math.inf_f32);
-        try expect(!math.isNan(v[3]));
-    }
-}
-
-pub inline fn maxFast(v0: anytype, v1: anytype) @TypeOf(v0, v1) {
-    return select(v0 > v1, v0, v1); // maxps
-}
-test "zmath.maxFast" {
-    {
-        const v0 = f32x4(1.0, 3.0, 2.0, 7.0);
-        const v1 = f32x4(2.0, 1.0, 4.0, math.inf_f32);
-        const v = maxFast(v0, v1);
-        try expect(approxEqAbs(v, f32x4(2.0, 3.0, 4.0, math.inf_f32), 0.0));
-    }
-    {
-        const v0 = f32x4(1.0, math.nan_f32, 5.0, math.qnan_f32);
-        const v1 = f32x4(2.0, 1.0, 4.0, math.inf_f32);
-        const v = maxFast(v0, v1);
-        try expect(v[0] == 2.0);
-        try expect(v[1] == 1.0);
-        try expect(v[2] == 5.0);
-        try expect(v[3] == math.inf_f32);
-        try expect(!math.isNan(v[3]));
-    }
-}
-
-pub inline fn min(v0: anytype, v1: anytype) @TypeOf(v0, v1) {
-    // This will handle inf & nan
-    return @min(v0, v1); // minps, cmpunordps, andps, andnps, orps
-}
-test "zmath.min" {
-    if (builtin.target.os.tag == .macos) return error.SkipZigTest;
-    {
-        const v0 = f32x4(1.0, 3.0, 2.0, 7.0);
-        const v1 = f32x4(2.0, 1.0, 4.0, math.inf_f32);
-        const v = min(v0, v1);
-        try expect(approxEqAbs(v, f32x4(1.0, 1.0, 2.0, 7.0), 0.0));
-    }
-    {
-        const v0 = f32x8(0, 0, -2.0, 0, 1.0, 3.0, 2.0, 7.0);
-        const v1 = f32x8(0, 1.0, 0, 0, 2.0, 1.0, 4.0, math.inf_f32);
-        const v = min(v0, v1);
-        try expect(approxEqAbs(v, f32x8(0.0, 0.0, -2.0, 0.0, 1.0, 1.0, 2.0, 7.0), 0.0));
-    }
-    {
-        const v0 = f32x4(1.0, math.nan_f32, 5.0, math.qnan_f32);
-        const v1 = f32x4(2.0, 1.0, 4.0, math.inf_f32);
-        const v = min(v0, v1);
-        try expect(v[0] == 1.0);
-        try expect(v[1] == 1.0);
-        try expect(!math.isNan(v[1]));
-        try expect(v[2] == 4.0);
-        try expect(v[3] == math.inf_f32);
-        try expect(!math.isNan(v[3]));
-    }
-    {
-        const v0 = f32x4(-math.inf_f32, math.inf_f32, math.inf_f32, math.qnan_f32);
-        const v1 = f32x4(math.qnan_f32, -math.inf_f32, math.qnan_f32, math.nan_f32);
-        const v = min(v0, v1);
-        try expect(v[0] == -math.inf_f32);
-        try expect(v[1] == -math.inf_f32);
-        try expect(v[2] == math.inf_f32);
-        try expect(!math.isNan(v[2]));
-        try expect(math.isNan(v[3]));
-        try expect(!math.isInf(v[3]));
-    }
-}
-
-pub inline fn max(v0: anytype, v1: anytype) @TypeOf(v0, v1) {
-    // This will handle inf & nan
-    return @max(v0, v1); // maxps, cmpunordps, andps, andnps, orps
-}
-test "zmath.max" {
-    if (builtin.target.os.tag == .macos) return error.SkipZigTest;
-    {
-        const v0 = f32x4(1.0, 3.0, 2.0, 7.0);
-        const v1 = f32x4(2.0, 1.0, 4.0, math.inf_f32);
-        const v = max(v0, v1);
-        try expect(approxEqAbs(v, f32x4(2.0, 3.0, 4.0, math.inf_f32), 0.0));
-    }
-    {
-        const v0 = f32x8(0, 0, -2.0, 0, 1.0, 3.0, 2.0, 7.0);
-        const v1 = f32x8(0, 1.0, 0, 0, 2.0, 1.0, 4.0, math.inf_f32);
-        const v = max(v0, v1);
-        try expect(approxEqAbs(v, f32x8(0.0, 1.0, 0.0, 0.0, 2.0, 3.0, 4.0, math.inf_f32), 0.0));
-    }
-    {
-        const v0 = f32x4(1.0, math.nan_f32, 5.0, math.qnan_f32);
-        const v1 = f32x4(2.0, 1.0, 4.0, math.inf_f32);
-        const v = max(v0, v1);
-        try expect(v[0] == 2.0);
-        try expect(v[1] == 1.0);
-        try expect(v[2] == 5.0);
-        try expect(v[3] == math.inf_f32);
-        try expect(!math.isNan(v[3]));
-    }
-    {
-        const v0 = f32x4(-math.inf_f32, math.inf_f32, math.inf_f32, math.qnan_f32);
-        const v1 = f32x4(math.qnan_f32, -math.inf_f32, math.qnan_f32, math.nan_f32);
-        const v = max(v0, v1);
-        try expect(v[0] == -math.inf_f32);
-        try expect(v[1] == math.inf_f32);
-        try expect(v[2] == math.inf_f32);
-        try expect(!math.isNan(v[2]));
-        try expect(math.isNan(v[3]));
-        try expect(!math.isInf(v[3]));
-    }
-}
-
-pub fn round(v: anytype) @TypeOf(v) {
-    const T = @TypeOf(v);
-    if (cpu_arch == .x86_64 and has_avx) {
-        if (T == F32x4) {
-            return asm ("vroundps $0, %%xmm0, %%xmm0"
-                : [ret] "={xmm0}" (-> T),
-                : [v] "{xmm0}" (v),
-            );
-        } else if (T == F32x8) {
-            return asm ("vroundps $0, %%ymm0, %%ymm0"
-                : [ret] "={ymm0}" (-> T),
-                : [v] "{ymm0}" (v),
-            );
-        } else if (T == F32x16 and has_avx512f) {
-            return asm ("vrndscaleps $0, %%zmm0, %%zmm0"
-                : [ret] "={zmm0}" (-> T),
-                : [v] "{zmm0}" (v),
-            );
-        } else if (T == F32x16 and !has_avx512f) {
-            const arr: [16]f32 = v;
-            var ymm0 = @as(F32x8, arr[0..8].*);
-            var ymm1 = @as(F32x8, arr[8..16].*);
-            ymm0 = asm ("vroundps $0, %%ymm0, %%ymm0"
-                : [ret] "={ymm0}" (-> F32x8),
-                : [v] "{ymm0}" (ymm0),
-            );
-            ymm1 = asm ("vroundps $0, %%ymm1, %%ymm1"
-                : [ret] "={ymm1}" (-> F32x8),
-                : [v] "{ymm1}" (ymm1),
-            );
-            return @shuffle(f32, ymm0, ymm1, [16]i32{ 0, 1, 2, 3, 4, 5, 6, 7, -1, -2, -3, -4, -5, -6, -7, -8 });
-        }
-    } else {
-        const sign = andInt(v, splatNegativeZero(T));
-        const magic = orInt(splatNoFraction(T), sign);
-        var r1 = v + magic;
-        r1 = r1 - magic;
-        const r2 = abs(v);
-        const mask = r2 <= splatNoFraction(T);
-        return select(mask, r1, v);
-    }
-}
-test "zmath.round" {
-    {
-        try expect(all(round(splat(F32x4, math.inf_f32)) == splat(F32x4, math.inf_f32), 0));
-        try expect(all(round(splat(F32x4, -math.inf_f32)) == splat(F32x4, -math.inf_f32), 0));
-        try expect(all(isNan(round(splat(F32x4, math.nan_f32))), 0));
-        try expect(all(isNan(round(splat(F32x4, -math.nan_f32))), 0));
-        try expect(all(isNan(round(splat(F32x4, math.qnan_f32))), 0));
-        try expect(all(isNan(round(splat(F32x4, -math.qnan_f32))), 0));
-    }
-    {
-        var v = round(f32x16(1.1, -1.1, -1.5, 1.5, 2.1, 2.8, 2.9, 4.1, 5.8, 6.1, 7.9, 8.9, 10.1, 11.2, 12.7, 13.1));
-        try expect(approxEqAbs(
-            v,
-            f32x16(1.0, -1.0, -2.0, 2.0, 2.0, 3.0, 3.0, 4.0, 6.0, 6.0, 8.0, 9.0, 10.0, 11.0, 13.0, 13.0),
-            0.0,
-        ));
-    }
-    var v = round(f32x4(1.1, -1.1, -1.5, 1.5));
-    try expect(approxEqAbs(v, f32x4(1.0, -1.0, -2.0, 2.0), 0.0));
-
-    const v1 = f32x4(-10_000_000.1, -math.inf_f32, 10_000_001.5, math.inf_f32);
-    v = round(v1);
-    try expect(v[3] == math.inf_f32);
-    try expect(approxEqAbs(v, f32x4(-10_000_000.1, -math.inf_f32, 10_000_001.5, math.inf_f32), 0.0));
-
-    const v2 = f32x4(-math.qnan_f32, math.qnan_f32, math.nan_f32, -math.inf_f32);
-    v = round(v2);
-    try expect(math.isNan(v2[0]));
-    try expect(math.isNan(v2[1]));
-    try expect(math.isNan(v2[2]));
-    try expect(v2[3] == -math.inf_f32);
-
-    const v3 = f32x4(1001.5, -201.499, -10000.99, -101.5);
-    v = round(v3);
-    try expect(approxEqAbs(v, f32x4(1002.0, -201.0, -10001.0, -102.0), 0.0));
-
-    const v4 = f32x4(-1_388_609.9, 1_388_609.5, 1_388_109.01, 2_388_609.5);
-    v = round(v4);
-    try expect(approxEqAbs(v, f32x4(-1_388_610.0, 1_388_610.0, 1_388_109.0, 2_388_610.0), 0.0));
-
-    var f: f32 = -100.0;
-    var i: u32 = 0;
-    while (i < 100) : (i += 1) {
-        const vr = round(splat(F32x4, f));
-        const fr = @round(splat(F32x4, f));
-        const vr8 = round(splat(F32x8, f));
-        const fr8 = @round(splat(F32x8, f));
-        const vr16 = round(splat(F32x16, f));
-        const fr16 = @round(splat(F32x16, f));
-        try expect(approxEqAbs(vr, fr, 0.0));
-        try expect(approxEqAbs(vr8, fr8, 0.0));
-        try expect(approxEqAbs(vr16, fr16, 0.0));
-        f += 0.12345 * @intToFloat(f32, i);
-    }
-}
-
-pub fn trunc(v: anytype) @TypeOf(v) {
-    const T = @TypeOf(v);
-    if (cpu_arch == .x86_64 and has_avx) {
-        if (T == F32x4) {
-            return asm ("vroundps $3, %%xmm0, %%xmm0"
-                : [ret] "={xmm0}" (-> T),
-                : [v] "{xmm0}" (v),
-            );
-        } else if (T == F32x8) {
-            return asm ("vroundps $3, %%ymm0, %%ymm0"
-                : [ret] "={ymm0}" (-> T),
-                : [v] "{ymm0}" (v),
-            );
-        } else if (T == F32x16 and has_avx512f) {
-            return asm ("vrndscaleps $3, %%zmm0, %%zmm0"
-                : [ret] "={zmm0}" (-> T),
-                : [v] "{zmm0}" (v),
-            );
-        } else if (T == F32x16 and !has_avx512f) {
-            const arr: [16]f32 = v;
-            var ymm0 = @as(F32x8, arr[0..8].*);
-            var ymm1 = @as(F32x8, arr[8..16].*);
-            ymm0 = asm ("vroundps $3, %%ymm0, %%ymm0"
-                : [ret] "={ymm0}" (-> F32x8),
-                : [v] "{ymm0}" (ymm0),
-            );
-            ymm1 = asm ("vroundps $3, %%ymm1, %%ymm1"
-                : [ret] "={ymm1}" (-> F32x8),
-                : [v] "{ymm1}" (ymm1),
-            );
-            return @shuffle(f32, ymm0, ymm1, [16]i32{ 0, 1, 2, 3, 4, 5, 6, 7, -1, -2, -3, -4, -5, -6, -7, -8 });
-        }
-    } else {
-        const mask = abs(v) < splatNoFraction(T);
-        const result = floatToIntAndBack(v);
-        return select(mask, result, v);
-    }
-}
-test "zmath.trunc" {
-    {
-        try expect(all(trunc(splat(F32x4, math.inf_f32)) == splat(F32x4, math.inf_f32), 0));
-        try expect(all(trunc(splat(F32x4, -math.inf_f32)) == splat(F32x4, -math.inf_f32), 0));
-        try expect(all(isNan(trunc(splat(F32x4, math.nan_f32))), 0));
-        try expect(all(isNan(trunc(splat(F32x4, -math.nan_f32))), 0));
-        try expect(all(isNan(trunc(splat(F32x4, math.qnan_f32))), 0));
-        try expect(all(isNan(trunc(splat(F32x4, -math.qnan_f32))), 0));
-    }
-    {
-        var v = trunc(f32x16(1.1, -1.1, -1.5, 1.5, 2.1, 2.8, 2.9, 4.1, 5.8, 6.1, 7.9, 8.9, 10.1, 11.2, 12.7, 13.1));
-        try expect(approxEqAbs(
-            v,
-            f32x16(1.0, -1.0, -1.0, 1.0, 2.0, 2.0, 2.0, 4.0, 5.0, 6.0, 7.0, 8.0, 10.0, 11.0, 12.0, 13.0),
-            0.0,
-        ));
-    }
-    var v = trunc(f32x4(1.1, -1.1, -1.5, 1.5));
-    try expect(approxEqAbs(v, f32x4(1.0, -1.0, -1.0, 1.0), 0.0));
-
-    v = trunc(f32x4(-10_000_002.1, -math.inf_f32, 10_000_001.5, math.inf_f32));
-    try expect(approxEqAbs(v, f32x4(-10_000_002.1, -math.inf_f32, 10_000_001.5, math.inf_f32), 0.0));
-
-    v = trunc(f32x4(-math.qnan_f32, math.qnan_f32, math.nan_f32, -math.inf_f32));
-    try expect(math.isNan(v[0]));
-    try expect(math.isNan(v[1]));
-    try expect(math.isNan(v[2]));
-    try expect(v[3] == -math.inf_f32);
-
-    v = trunc(f32x4(1000.5001, -201.499, -10000.99, 100.750001));
-    try expect(approxEqAbs(v, f32x4(1000.0, -201.0, -10000.0, 100.0), 0.0));
-
-    v = trunc(f32x4(-7_388_609.5, 7_388_609.1, 8_388_109.5, -8_388_509.5));
-    try expect(approxEqAbs(v, f32x4(-7_388_609.0, 7_388_609.0, 8_388_109.0, -8_388_509.0), 0.0));
-
-    var f: f32 = -100.0;
-    var i: u32 = 0;
-    while (i < 100) : (i += 1) {
-        const vr = trunc(splat(F32x4, f));
-        const fr = @trunc(splat(F32x4, f));
-        const vr8 = trunc(splat(F32x8, f));
-        const fr8 = @trunc(splat(F32x8, f));
-        const vr16 = trunc(splat(F32x16, f));
-        const fr16 = @trunc(splat(F32x16, f));
-        try expect(approxEqAbs(vr, fr, 0.0));
-        try expect(approxEqAbs(vr8, fr8, 0.0));
-        try expect(approxEqAbs(vr16, fr16, 0.0));
-        f += 0.12345 * @intToFloat(f32, i);
-    }
-}
-
-pub fn floor(v: anytype) @TypeOf(v) {
-    const T = @TypeOf(v);
-    if (cpu_arch == .x86_64 and has_avx) {
-        if (T == F32x4) {
-            return asm ("vroundps $1, %%xmm0, %%xmm0"
-                : [ret] "={xmm0}" (-> T),
-                : [v] "{xmm0}" (v),
-            );
-        } else if (T == F32x8) {
-            return asm ("vroundps $1, %%ymm0, %%ymm0"
-                : [ret] "={ymm0}" (-> T),
-                : [v] "{ymm0}" (v),
-            );
-        } else if (T == F32x16 and has_avx512f) {
-            return asm ("vrndscaleps $1, %%zmm0, %%zmm0"
-                : [ret] "={zmm0}" (-> T),
-                : [v] "{zmm0}" (v),
-            );
-        } else if (T == F32x16 and !has_avx512f) {
-            const arr: [16]f32 = v;
-            var ymm0 = @as(F32x8, arr[0..8].*);
-            var ymm1 = @as(F32x8, arr[8..16].*);
-            ymm0 = asm ("vroundps $1, %%ymm0, %%ymm0"
-                : [ret] "={ymm0}" (-> F32x8),
-                : [v] "{ymm0}" (ymm0),
-            );
-            ymm1 = asm ("vroundps $1, %%ymm1, %%ymm1"
-                : [ret] "={ymm1}" (-> F32x8),
-                : [v] "{ymm1}" (ymm1),
-            );
-            return @shuffle(f32, ymm0, ymm1, [16]i32{ 0, 1, 2, 3, 4, 5, 6, 7, -1, -2, -3, -4, -5, -6, -7, -8 });
-        }
-    } else {
-        const mask = abs(v) < splatNoFraction(T);
-        var result = floatToIntAndBack(v);
-        const larger_mask = result > v;
-        const larger = select(larger_mask, splat(T, -1.0), splat(T, 0.0));
-        result = result + larger;
-        return select(mask, result, v);
-    }
-}
-test "zmath.floor" {
-    {
-        try expect(all(floor(splat(F32x4, math.inf_f32)) == splat(F32x4, math.inf_f32), 0));
-        try expect(all(floor(splat(F32x4, -math.inf_f32)) == splat(F32x4, -math.inf_f32), 0));
-        try expect(all(isNan(floor(splat(F32x4, math.nan_f32))), 0));
-        try expect(all(isNan(floor(splat(F32x4, -math.nan_f32))), 0));
-        try expect(all(isNan(floor(splat(F32x4, math.qnan_f32))), 0));
-        try expect(all(isNan(floor(splat(F32x4, -math.qnan_f32))), 0));
-    }
-    {
-        var v = floor(f32x16(1.1, -1.1, -1.5, 1.5, 2.1, 2.8, 2.9, 4.1, 5.8, 6.1, 7.9, 8.9, 10.1, 11.2, 12.7, 13.1));
-        try expect(approxEqAbs(
-            v,
-            f32x16(1.0, -2.0, -2.0, 1.0, 2.0, 2.0, 2.0, 4.0, 5.0, 6.0, 7.0, 8.0, 10.0, 11.0, 12.0, 13.0),
-            0.0,
-        ));
-    }
-    var v = floor(f32x4(1.5, -1.5, -1.7, -2.1));
-    try expect(approxEqAbs(v, f32x4(1.0, -2.0, -2.0, -3.0), 0.0));
-
-    v = floor(f32x4(-10_000_002.1, -math.inf_f32, 10_000_001.5, math.inf_f32));
-    try expect(approxEqAbs(v, f32x4(-10_000_002.1, -math.inf_f32, 10_000_001.5, math.inf_f32), 0.0));
-
-    v = floor(f32x4(-math.qnan_f32, math.qnan_f32, math.nan_f32, -math.inf_f32));
-    try expect(math.isNan(v[0]));
-    try expect(math.isNan(v[1]));
-    try expect(math.isNan(v[2]));
-    try expect(v[3] == -math.inf_f32);
-
-    v = floor(f32x4(1000.5001, -201.499, -10000.99, 100.75001));
-    try expect(approxEqAbs(v, f32x4(1000.0, -202.0, -10001.0, 100.0), 0.0));
-
-    v = floor(f32x4(-7_388_609.5, 7_388_609.1, 8_388_109.5, -8_388_509.5));
-    try expect(approxEqAbs(v, f32x4(-7_388_610.0, 7_388_609.0, 8_388_109.0, -8_388_510.0), 0.0));
-
-    var f: f32 = -100.0;
-    var i: u32 = 0;
-    while (i < 100) : (i += 1) {
-        const vr = floor(splat(F32x4, f));
-        const fr = @floor(splat(F32x4, f));
-        const vr8 = floor(splat(F32x8, f));
-        const fr8 = @floor(splat(F32x8, f));
-        const vr16 = floor(splat(F32x16, f));
-        const fr16 = @floor(splat(F32x16, f));
-        try expect(approxEqAbs(vr, fr, 0.0));
-        try expect(approxEqAbs(vr8, fr8, 0.0));
-        try expect(approxEqAbs(vr16, fr16, 0.0));
-        f += 0.12345 * @intToFloat(f32, i);
-    }
-}
-
-pub fn ceil(v: anytype) @TypeOf(v) {
-    const T = @TypeOf(v);
-    if (cpu_arch == .x86_64 and has_avx) {
-        if (T == F32x4) {
-            return asm ("vroundps $2, %%xmm0, %%xmm0"
-                : [ret] "={xmm0}" (-> T),
-                : [v] "{xmm0}" (v),
-            );
-        } else if (T == F32x8) {
-            return asm ("vroundps $2, %%ymm0, %%ymm0"
-                : [ret] "={ymm0}" (-> T),
-                : [v] "{ymm0}" (v),
-            );
-        } else if (T == F32x16 and has_avx512f) {
-            return asm ("vrndscaleps $2, %%zmm0, %%zmm0"
-                : [ret] "={zmm0}" (-> T),
-                : [v] "{zmm0}" (v),
-            );
-        } else if (T == F32x16 and !has_avx512f) {
-            const arr: [16]f32 = v;
-            var ymm0 = @as(F32x8, arr[0..8].*);
-            var ymm1 = @as(F32x8, arr[8..16].*);
-            ymm0 = asm ("vroundps $2, %%ymm0, %%ymm0"
-                : [ret] "={ymm0}" (-> F32x8),
-                : [v] "{ymm0}" (ymm0),
-            );
-            ymm1 = asm ("vroundps $2, %%ymm1, %%ymm1"
-                : [ret] "={ymm1}" (-> F32x8),
-                : [v] "{ymm1}" (ymm1),
-            );
-            return @shuffle(f32, ymm0, ymm1, [16]i32{ 0, 1, 2, 3, 4, 5, 6, 7, -1, -2, -3, -4, -5, -6, -7, -8 });
-        }
-    } else {
-        const mask = abs(v) < splatNoFraction(T);
-        var result = floatToIntAndBack(v);
-        const smaller_mask = result < v;
-        const smaller = select(smaller_mask, splat(T, -1.0), splat(T, 0.0));
-        result = result - smaller;
-        return select(mask, result, v);
-    }
-}
-test "zmath.ceil" {
-    {
-        try expect(all(ceil(splat(F32x4, math.inf_f32)) == splat(F32x4, math.inf_f32), 0));
-        try expect(all(ceil(splat(F32x4, -math.inf_f32)) == splat(F32x4, -math.inf_f32), 0));
-        try expect(all(isNan(ceil(splat(F32x4, math.nan_f32))), 0));
-        try expect(all(isNan(ceil(splat(F32x4, -math.nan_f32))), 0));
-        try expect(all(isNan(ceil(splat(F32x4, math.qnan_f32))), 0));
-        try expect(all(isNan(ceil(splat(F32x4, -math.qnan_f32))), 0));
-    }
-    {
-        var v = ceil(f32x16(1.1, -1.1, -1.5, 1.5, 2.1, 2.8, 2.9, 4.1, 5.8, 6.1, 7.9, 8.9, 10.1, 11.2, 12.7, 13.1));
-        try expect(approxEqAbs(
-            v,
-            f32x16(2.0, -1.0, -1.0, 2.0, 3.0, 3.0, 3.0, 5.0, 6.0, 7.0, 8.0, 9.0, 11.0, 12.0, 13.0, 14.0),
-            0.0,
-        ));
-    }
-    var v = ceil(f32x4(1.5, -1.5, -1.7, -2.1));
-    try expect(approxEqAbs(v, f32x4(2.0, -1.0, -1.0, -2.0), 0.0));
-
-    v = ceil(f32x4(-10_000_002.1, -math.inf_f32, 10_000_001.5, math.inf_f32));
-    try expect(approxEqAbs(v, f32x4(-10_000_002.1, -math.inf_f32, 10_000_001.5, math.inf_f32), 0.0));
-
-    v = ceil(f32x4(-math.qnan_f32, math.qnan_f32, math.nan_f32, -math.inf_f32));
-    try expect(math.isNan(v[0]));
-    try expect(math.isNan(v[1]));
-    try expect(math.isNan(v[2]));
-    try expect(v[3] == -math.inf_f32);
-
-    v = ceil(f32x4(1000.5001, -201.499, -10000.99, 100.75001));
-    try expect(approxEqAbs(v, f32x4(1001.0, -201.0, -10000.0, 101.0), 0.0));
-
-    v = ceil(f32x4(-1_388_609.5, 1_388_609.1, 1_388_109.9, -1_388_509.9));
-    try expect(approxEqAbs(v, f32x4(-1_388_609.0, 1_388_610.0, 1_388_110.0, -1_388_509.0), 0.0));
-
-    var f: f32 = -100.0;
-    var i: u32 = 0;
-    while (i < 100) : (i += 1) {
-        const vr = ceil(splat(F32x4, f));
-        const fr = @ceil(splat(F32x4, f));
-        const vr8 = ceil(splat(F32x8, f));
-        const fr8 = @ceil(splat(F32x8, f));
-        const vr16 = ceil(splat(F32x16, f));
-        const fr16 = @ceil(splat(F32x16, f));
-        try expect(approxEqAbs(vr, fr, 0.0));
-        try expect(approxEqAbs(vr8, fr8, 0.0));
-        try expect(approxEqAbs(vr16, fr16, 0.0));
-        f += 0.12345 * @intToFloat(f32, i);
-    }
-}
-
-pub inline fn clamp(v: anytype, vmin: anytype, vmax: anytype) @TypeOf(v, vmin, vmax) {
-    var result = max(vmin, v);
-    result = min(vmax, result);
-    return result;
-}
-test "zmath.clamp" {
-    if (builtin.target.os.tag == .macos) return error.SkipZigTest;
-    {
-        const v0 = f32x4(-1.0, 0.2, 1.1, -0.3);
-        const v = clamp(v0, splat(F32x4, -0.5), splat(F32x4, 0.5));
-        try expect(approxEqAbs(v, f32x4(-0.5, 0.2, 0.5, -0.3), 0.0001));
-    }
-    {
-        const v0 = f32x8(-2.0, 0.25, -0.25, 100.0, -1.0, 0.2, 1.1, -0.3);
-        const v = clamp(v0, splat(F32x8, -0.5), splat(F32x8, 0.5));
-        try expect(approxEqAbs(v, f32x8(-0.5, 0.25, -0.25, 0.5, -0.5, 0.2, 0.5, -0.3), 0.0001));
-    }
-    {
-        const v0 = f32x4(-math.inf_f32, math.inf_f32, math.nan_f32, math.qnan_f32);
-        const v = clamp(v0, f32x4(-100.0, 0.0, -100.0, 0.0), f32x4(0.0, 100.0, 0.0, 100.0));
-        try expect(approxEqAbs(v, f32x4(-100.0, 100.0, -100.0, 0.0), 0.0001));
-    }
-    {
-        const v0 = f32x4(math.inf_f32, math.inf_f32, -math.nan_f32, -math.qnan_f32);
-        const v = clamp(v0, splat(F32x4, -1.0), splat(F32x4, 1.0));
-        try expect(approxEqAbs(v, f32x4(1.0, 1.0, -1.0, -1.0), 0.0001));
-    }
-}
-
-pub inline fn clampFast(v: anytype, vmin: anytype, vmax: anytype) @TypeOf(v, vmin, vmax) {
-    var result = maxFast(vmin, v);
-    result = minFast(vmax, result);
-    return result;
-}
-test "zmath.clampFast" {
-    {
-        const v0 = f32x4(-1.0, 0.2, 1.1, -0.3);
-        const v = clampFast(v0, splat(F32x4, -0.5), splat(F32x4, 0.5));
-        try expect(approxEqAbs(v, f32x4(-0.5, 0.2, 0.5, -0.3), 0.0001));
-    }
-}
-
-pub inline fn saturate(v: anytype) @TypeOf(v) {
-    const T = @TypeOf(v);
-    var result = max(v, splat(T, 0.0));
-    result = min(result, splat(T, 1.0));
-    return result;
-}
-test "zmath.saturate" {
-    if (builtin.target.os.tag == .macos) return error.SkipZigTest;
-    {
-        const v0 = f32x4(-1.0, 0.2, 1.1, -0.3);
-        const v = saturate(v0);
-        try expect(approxEqAbs(v, f32x4(0.0, 0.2, 1.0, 0.0), 0.0001));
-    }
-    {
-        const v0 = f32x8(0.0, 0.0, 2.0, -2.0, -1.0, 0.2, 1.1, -0.3);
-        const v = saturate(v0);
-        try expect(approxEqAbs(v, f32x8(0.0, 0.0, 1.0, 0.0, 0.0, 0.2, 1.0, 0.0), 0.0001));
-    }
-    {
-        const v0 = f32x4(-math.inf_f32, math.inf_f32, math.nan_f32, math.qnan_f32);
-        const v = saturate(v0);
-        try expect(approxEqAbs(v, f32x4(0.0, 1.0, 0.0, 0.0), 0.0001));
-    }
-    {
-        const v0 = f32x4(math.inf_f32, math.inf_f32, -math.nan_f32, -math.qnan_f32);
-        const v = saturate(v0);
-        try expect(approxEqAbs(v, f32x4(1.0, 1.0, 0.0, 0.0), 0.0001));
-    }
-}
-
-pub inline fn saturateFast(v: anytype) @TypeOf(v) {
-    const T = @TypeOf(v);
-    var result = maxFast(v, splat(T, 0.0));
-    result = minFast(result, splat(T, 1.0));
-    return result;
-}
-test "zmath.saturateFast" {
-    {
-        const v0 = f32x4(-1.0, 0.2, 1.1, -0.3);
-        const v = saturateFast(v0);
-        try expect(approxEqAbs(v, f32x4(0.0, 0.2, 1.0, 0.0), 0.0001));
-    }
-    {
-        const v0 = f32x8(0.0, 0.0, 2.0, -2.0, -1.0, 0.2, 1.1, -0.3);
-        const v = saturateFast(v0);
-        try expect(approxEqAbs(v, f32x8(0.0, 0.0, 1.0, 0.0, 0.0, 0.2, 1.0, 0.0), 0.0001));
-    }
-    {
-        const v0 = f32x4(-math.inf_f32, math.inf_f32, math.nan_f32, math.qnan_f32);
-        const v = saturateFast(v0);
-        try expect(approxEqAbs(v, f32x4(0.0, 1.0, 0.0, 0.0), 0.0001));
-    }
-    {
-        const v0 = f32x4(math.inf_f32, math.inf_f32, -math.nan_f32, -math.qnan_f32);
-        const v = saturateFast(v0);
-        try expect(approxEqAbs(v, f32x4(1.0, 1.0, 0.0, 0.0), 0.0001));
-    }
-}
-
-pub inline fn sqrt(v: anytype) @TypeOf(v) {
-    return @sqrt(v); // sqrtps
-}
-
-pub inline fn abs(v: anytype) @TypeOf(v) {
-    return @fabs(v); // load, andps
-}
-
-pub inline fn select(mask: anytype, v0: anytype, v1: anytype) @TypeOf(v0, v1) {
-    return @select(f32, mask, v0, v1);
-}
-
-pub inline fn lerp(v0: anytype, v1: anytype, t: f32) @TypeOf(v0, v1) {
-    const T = @TypeOf(v0, v1);
-    return v0 + (v1 - v0) * splat(T, t); // subps, shufps, addps, mulps
-}
-
-pub inline fn lerpV(v0: anytype, v1: anytype, t: anytype) @TypeOf(v0, v1, t) {
-    return v0 + (v1 - v0) * t; // subps, addps, mulps
-}
-
-pub inline fn lerpInverse(v0: anytype, v1: anytype, t: anytype) @TypeOf(v0, v1) {
-    const T = @TypeOf(v0, v1);
-    return (splat(T, t) - v0) / (v1 - v0);
-}
-
-pub inline fn lerpInverseV(v0: anytype, v1: anytype, t: anytype) @TypeOf(v0, v1, t) {
-    return (t - v0) / (v1 - v0);
-}
-test "zmath.lerpInverse" {
-    try expect(math.approxEqAbs(f32, lerpInverseV(10.0, 100.0, 10.0), 0, 0.0005));
-    try expect(math.approxEqAbs(f32, lerpInverseV(10.0, 100.0, 100.0), 1, 0.0005));
-    try expect(math.approxEqAbs(f32, lerpInverseV(10.0, 100.0, 55.0), 0.5, 0.05));
-    try expect(approxEqAbs(lerpInverse(f32x4(0, 0, 10, 10), f32x4(100, 200, 100, 100), 10.0), f32x4(0.1, 0.05, 0, 0), 0.0005));
-}
-
-/// To transform a vector of values from one range to another.
-pub inline fn mapLinear(v: anytype, min1: anytype, max1: anytype, min2: anytype, max2: anytype) @TypeOf(v) {
-    const T = @TypeOf(v);
-    const min1V = splat(T, min1);
-    const max1V = splat(T, max1);
-    const min2V = splat(T, min2);
-    const max2V = splat(T, max2);
-    const dV = max1V - min1V;
-    return min2V + (v - min1V) * (max2V - min2V) / dV;
-}
-
-pub inline fn mapLinearV(v: anytype, min1: anytype, max1: anytype, min2: anytype, max2: anytype) @TypeOf(v, min1, max1, min2, max2) {
-    const d = max1 - min1;
-    return min2 + (v - min1) * (max2 - min2) / d;
-}
-test "zmath.mapLinear" {
-    try expect(math.approxEqAbs(f32, mapLinearV(0, 0, 1.2, 10, 100), 10, 0.0005));
-    try expect(math.approxEqAbs(f32, mapLinearV(1.2, 0, 1.2, 10, 100), 100, 0.0005));
-    try expect(math.approxEqAbs(f32, mapLinearV(0.6, 0, 1.2, 10, 100), 55, 0.0005));
-    try expect(approxEqAbs(mapLinearV(splat(F32x4, 0), splat(F32x4, 0), splat(F32x4, 1.2), splat(F32x4, 10), splat(F32x4, 100)), splat(F32x4, 10), 0.0005));
-    try expect(approxEqAbs(mapLinear(f32x4(0, 0, 0.6, 1.2), 0, 1.2, 10, 100), f32x4(10, 10, 55, 100), 0.0005));
-}
-
-pub const F32x4Component = enum { x, y, z, w };
-
-pub inline fn swizzle(
-    v: F32x4,
-    comptime x: F32x4Component,
-    comptime y: F32x4Component,
-    comptime z: F32x4Component,
-    comptime w: F32x4Component,
-) F32x4 {
-    return @shuffle(f32, v, undefined, [4]i32{ @enumToInt(x), @enumToInt(y), @enumToInt(z), @enumToInt(w) });
-}
-
-pub inline fn mod(v0: anytype, v1: anytype) @TypeOf(v0, v1) {
-    // vdivps, vroundps, vmulps, vsubps
-    return v0 - v1 * trunc(v0 / v1);
-}
-test "zmath.mod" {
-    if (builtin.target.os.tag == .macos and builtin.zig_backend != .stage1) return error.SkipZigTest;
-    try expect(approxEqAbs(mod(splat(F32x4, 3.1), splat(F32x4, 1.7)), splat(F32x4, 1.4), 0.0005));
-    try expect(approxEqAbs(mod(splat(F32x4, -3.0), splat(F32x4, 2.0)), splat(F32x4, -1.0), 0.0005));
-    try expect(approxEqAbs(mod(splat(F32x4, -3.0), splat(F32x4, -2.0)), splat(F32x4, -1.0), 0.0005));
-    try expect(approxEqAbs(mod(splat(F32x4, 3.0), splat(F32x4, -2.0)), splat(F32x4, 1.0), 0.0005));
-    try expect(all(isNan(mod(splat(F32x4, math.inf_f32), splat(F32x4, 1.0))), 0));
-    try expect(all(isNan(mod(splat(F32x4, -math.inf_f32), splat(F32x4, 123.456))), 0));
-    try expect(all(isNan(mod(splat(F32x4, math.nan_f32), splat(F32x4, 123.456))), 0));
-    try expect(all(isNan(mod(splat(F32x4, math.qnan_f32), splat(F32x4, 123.456))), 0));
-    try expect(all(isNan(mod(splat(F32x4, -math.qnan_f32), splat(F32x4, 123.456))), 0));
-    try expect(all(isNan(mod(splat(F32x4, 123.456), splat(F32x4, math.inf_f32))), 0));
-    try expect(all(isNan(mod(splat(F32x4, 123.456), splat(F32x4, -math.inf_f32))), 0));
-    try expect(all(isNan(mod(splat(F32x4, math.inf_f32), splat(F32x4, math.inf_f32))), 0));
-    try expect(all(isNan(mod(splat(F32x4, 123.456), splat(F32x4, math.nan_f32))), 0));
-    try expect(all(isNan(mod(splat(F32x4, math.inf_f32), splat(F32x4, math.nan_f32))), 0));
-}
-
-pub fn modAngle(v: anytype) @TypeOf(v) {
-    const T = @TypeOf(v);
-    return switch (T) {
-        f32 => modAngle32(v),
-        F32x4, F32x8, F32x16 => modAngle32xN(v),
-        else => @compileError("zmath.modAngle() not implemented for " ++ @typeName(T)),
-    };
-}
-
-pub inline fn modAngle32xN(v: anytype) @TypeOf(v) {
-    const T = @TypeOf(v);
-    return v - splat(T, math.tau) * round(v * splat(T, 1.0 / math.tau)); // 2 x vmulps, 2 x load, vroundps, vaddps
-}
-test "zmath.modAngle" {
-    try expect(approxEqAbs(modAngle(splat(F32x4, math.tau)), splat(F32x4, 0.0), 0.0005));
-    try expect(approxEqAbs(modAngle(splat(F32x4, 0.0)), splat(F32x4, 0.0), 0.0005));
-    try expect(approxEqAbs(modAngle(splat(F32x4, math.pi)), splat(F32x4, math.pi), 0.0005));
-    try expect(approxEqAbs(modAngle(splat(F32x4, 11 * math.pi)), splat(F32x4, math.pi), 0.0005));
-    try expect(approxEqAbs(modAngle(splat(F32x4, 3.5 * math.pi)), splat(F32x4, -0.5 * math.pi), 0.0005));
-    try expect(approxEqAbs(modAngle(splat(F32x4, 2.5 * math.pi)), splat(F32x4, 0.5 * math.pi), 0.0005));
-}
-
-pub inline fn mulAdd(v0: anytype, v1: anytype, v2: anytype) @TypeOf(v0, v1, v2) {
-    const T = @TypeOf(v0, v1, v2);
-    if (@import("zmath_options").enable_cross_platform_determinism) {
-        return v0 * v1 + v2; // Compiler will generate mul, add sequence (no fma even if the target supports it).
-    } else {
-        if (cpu_arch == .x86_64 and has_avx and has_fma) {
-            return @mulAdd(T, v0, v1, v2);
-        } else {
-            // NOTE(mziulek): On .x86_64 without HW fma instructions @mulAdd maps to really slow code!
-            return v0 * v1 + v2;
-        }
-    }
-}
-
-fn sin32xN(v: anytype) @TypeOf(v) {
-    // 11-degree minimax approximation
-    const T = @TypeOf(v);
-
-    var x = modAngle(v);
-    const sign = andInt(x, splatNegativeZero(T));
-    const c = orInt(sign, splat(T, math.pi));
-    const absx = andNotInt(sign, x);
-    const rflx = c - x;
-    const comp = absx <= splat(T, 0.5 * math.pi);
-    x = select(comp, x, rflx);
-    const x2 = x * x;
-
-    var result = mulAdd(splat(T, -2.3889859e-08), x2, splat(T, 2.7525562e-06));
-    result = mulAdd(result, x2, splat(T, -0.00019840874));
-    result = mulAdd(result, x2, splat(T, 0.0083333310));
-    result = mulAdd(result, x2, splat(T, -0.16666667));
-    result = mulAdd(result, x2, splat(T, 1.0));
-    return x * result;
-}
-test "zmath.sin" {
-    const epsilon = 0.0001;
-
-    try expect(approxEqAbs(sin(splat(F32x4, 0.5 * math.pi)), splat(F32x4, 1.0), epsilon));
-    try expect(approxEqAbs(sin(splat(F32x4, 0.0)), splat(F32x4, 0.0), epsilon));
-    try expect(approxEqAbs(sin(splat(F32x4, -0.0)), splat(F32x4, -0.0), epsilon));
-    try expect(approxEqAbs(sin(splat(F32x4, 89.123)), splat(F32x4, 0.916166), epsilon));
-    try expect(approxEqAbs(sin(splat(F32x8, 89.123)), splat(F32x8, 0.916166), epsilon));
-    try expect(approxEqAbs(sin(splat(F32x16, 89.123)), splat(F32x16, 0.916166), epsilon));
-    try expect(all(isNan(sin(splat(F32x4, math.inf_f32))), 0) == true);
-    try expect(all(isNan(sin(splat(F32x4, -math.inf_f32))), 0) == true);
-    try expect(all(isNan(sin(splat(F32x4, math.nan_f32))), 0) == true);
-    try expect(all(isNan(sin(splat(F32x4, math.qnan_f32))), 0) == true);
-
-    var f: f32 = -100.0;
-    var i: u32 = 0;
-    while (i < 100) : (i += 1) {
-        const vr = sin(splat(F32x4, f));
-        const fr = @sin(splat(F32x4, f));
-        const vr8 = sin(splat(F32x8, f));
-        const fr8 = @sin(splat(F32x8, f));
-        const vr16 = sin(splat(F32x16, f));
-        const fr16 = @sin(splat(F32x16, f));
-        try expect(approxEqAbs(vr, fr, epsilon));
-        try expect(approxEqAbs(vr8, fr8, epsilon));
-        try expect(approxEqAbs(vr16, fr16, epsilon));
-        f += 0.12345 * @intToFloat(f32, i);
-    }
-}
-
-fn cos32xN(v: anytype) @TypeOf(v) {
-    // 10-degree minimax approximation
-    const T = @TypeOf(v);
-
-    var x = modAngle(v);
-    var sign = andInt(x, splatNegativeZero(T));
-    const c = orInt(sign, splat(T, math.pi));
-    const absx = andNotInt(sign, x);
-    const rflx = c - x;
-    const comp = absx <= splat(T, 0.5 * math.pi);
-    x = select(comp, x, rflx);
-    sign = select(comp, splat(T, 1.0), splat(T, -1.0));
-    const x2 = x * x;
-
-    var result = mulAdd(splat(T, -2.6051615e-07), x2, splat(T, 2.4760495e-05));
-    result = mulAdd(result, x2, splat(T, -0.0013888378));
-    result = mulAdd(result, x2, splat(T, 0.041666638));
-    result = mulAdd(result, x2, splat(T, -0.5));
-    result = mulAdd(result, x2, splat(T, 1.0));
-    return sign * result;
-}
-test "zmath.cos" {
-    const epsilon = 0.0001;
-
-    try expect(approxEqAbs(cos(splat(F32x4, 0.5 * math.pi)), splat(F32x4, 0.0), epsilon));
-    try expect(approxEqAbs(cos(splat(F32x4, 0.0)), splat(F32x4, 1.0), epsilon));
-    try expect(approxEqAbs(cos(splat(F32x4, -0.0)), splat(F32x4, 1.0), epsilon));
-    try expect(all(isNan(cos(splat(F32x4, math.inf_f32))), 0) == true);
-    try expect(all(isNan(cos(splat(F32x4, -math.inf_f32))), 0) == true);
-    try expect(all(isNan(cos(splat(F32x4, math.nan_f32))), 0) == true);
-    try expect(all(isNan(cos(splat(F32x4, math.qnan_f32))), 0) == true);
-
-    var f: f32 = -100.0;
-    var i: u32 = 0;
-    while (i < 100) : (i += 1) {
-        const vr = cos(splat(F32x4, f));
-        const fr = @cos(splat(F32x4, f));
-        const vr8 = cos(splat(F32x8, f));
-        const fr8 = @cos(splat(F32x8, f));
-        const vr16 = cos(splat(F32x16, f));
-        const fr16 = @cos(splat(F32x16, f));
-        try expect(approxEqAbs(vr, fr, epsilon));
-        try expect(approxEqAbs(vr8, fr8, epsilon));
-        try expect(approxEqAbs(vr16, fr16, epsilon));
-        f += 0.12345 * @intToFloat(f32, i);
-    }
-}
-
-pub fn sin(v: anytype) @TypeOf(v) {
-    const T = @TypeOf(v);
-    return switch (T) {
-        f32 => sin32(v),
-        F32x4, F32x8, F32x16 => sin32xN(v),
-        else => @compileError("zmath.sin() not implemented for " ++ @typeName(T)),
-    };
-}
-
-pub fn cos(v: anytype) @TypeOf(v) {
-    const T = @TypeOf(v);
-    return switch (T) {
-        f32 => cos32(v),
-        F32x4, F32x8, F32x16 => cos32xN(v),
-        else => @compileError("zmath.cos() not implemented for " ++ @typeName(T)),
-    };
-}
-
-pub fn sincos(v: anytype) [2]@TypeOf(v) {
-    const T = @TypeOf(v);
-    return switch (T) {
-        f32 => sincos32(v),
-        F32x4, F32x8, F32x16 => sincos32xN(v),
-        else => @compileError("zmath.sincos() not implemented for " ++ @typeName(T)),
-    };
-}
-
-pub fn asin(v: anytype) @TypeOf(v) {
-    const T = @TypeOf(v);
-    return switch (T) {
-        f32 => asin32(v),
-        F32x4, F32x8, F32x16 => asin32xN(v),
-        else => @compileError("zmath.asin() not implemented for " ++ @typeName(T)),
-    };
-}
-
-pub fn acos(v: anytype) @TypeOf(v) {
-    const T = @TypeOf(v);
-    return switch (T) {
-        f32 => acos32(v),
-        F32x4, F32x8, F32x16 => acos32xN(v),
-        else => @compileError("zmath.acos() not implemented for " ++ @typeName(T)),
-    };
-}
-
-fn sincos32xN(v: anytype) [2]@TypeOf(v) {
-    const T = @TypeOf(v);
-
-    var x = modAngle(v);
-    var sign = andInt(x, splatNegativeZero(T));
-    const c = orInt(sign, splat(T, math.pi));
-    const absx = andNotInt(sign, x);
-    const rflx = c - x;
-    const comp = absx <= splat(T, 0.5 * math.pi);
-    x = select(comp, x, rflx);
-    sign = select(comp, splat(T, 1.0), splat(T, -1.0));
-    const x2 = x * x;
-
-    var sresult = mulAdd(splat(T, -2.3889859e-08), x2, splat(T, 2.7525562e-06));
-    sresult = mulAdd(sresult, x2, splat(T, -0.00019840874));
-    sresult = mulAdd(sresult, x2, splat(T, 0.0083333310));
-    sresult = mulAdd(sresult, x2, splat(T, -0.16666667));
-    sresult = x * mulAdd(sresult, x2, splat(T, 1.0));
-
-    var cresult = mulAdd(splat(T, -2.6051615e-07), x2, splat(T, 2.4760495e-05));
-    cresult = mulAdd(cresult, x2, splat(T, -0.0013888378));
-    cresult = mulAdd(cresult, x2, splat(T, 0.041666638));
-    cresult = mulAdd(cresult, x2, splat(T, -0.5));
-    cresult = sign * mulAdd(cresult, x2, splat(T, 1.0));
-
-    return .{ sresult, cresult };
-}
-test "zmath.sincos32xN" {
-    const epsilon = 0.0001;
-
-    var f: f32 = -100.0;
-    var i: u32 = 0;
-    while (i < 100) : (i += 1) {
-        const sc = sincos(splat(F32x4, f));
-        const sc8 = sincos(splat(F32x8, f));
-        const sc16 = sincos(splat(F32x16, f));
-        const s4 = @sin(splat(F32x4, f));
-        const s8 = @sin(splat(F32x8, f));
-        const s16 = @sin(splat(F32x16, f));
-        const c4 = @cos(splat(F32x4, f));
-        const c8 = @cos(splat(F32x8, f));
-        const c16 = @cos(splat(F32x16, f));
-        try expect(approxEqAbs(sc[0], s4, epsilon));
-        try expect(approxEqAbs(sc8[0], s8, epsilon));
-        try expect(approxEqAbs(sc16[0], s16, epsilon));
-        try expect(approxEqAbs(sc[1], c4, epsilon));
-        try expect(approxEqAbs(sc8[1], c8, epsilon));
-        try expect(approxEqAbs(sc16[1], c16, epsilon));
-        f += 0.12345 * @intToFloat(f32, i);
-    }
-}
-
-fn asin32xN(v: anytype) @TypeOf(v) {
-    // 7-degree minimax approximation
-    const T = @TypeOf(v);
-
-    const x = abs(v);
-    const root = sqrt(maxFast(splat(T, 0.0), splat(T, 1.0) - x));
-
-    var t0 = mulAdd(splat(T, -0.0012624911), x, splat(T, 0.0066700901));
-    t0 = mulAdd(t0, x, splat(T, -0.0170881256));
-    t0 = mulAdd(t0, x, splat(T, 0.0308918810));
-    t0 = mulAdd(t0, x, splat(T, -0.0501743046));
-    t0 = mulAdd(t0, x, splat(T, 0.0889789874));
-    t0 = mulAdd(t0, x, splat(T, -0.2145988016));
-    t0 = root * mulAdd(t0, x, splat(T, 1.5707963050));
-
-    const t1 = splat(T, math.pi) - t0;
-    return splat(T, 0.5 * math.pi) - select(v >= splat(T, 0.0), t0, t1);
-}
-
-fn acos32xN(v: anytype) @TypeOf(v) {
-    // 7-degree minimax approximation
-    const T = @TypeOf(v);
-
-    const x = abs(v);
-    const root = sqrt(maxFast(splat(T, 0.0), splat(T, 1.0) - x));
-
-    var t0 = mulAdd(splat(T, -0.0012624911), x, splat(T, 0.0066700901));
-    t0 = mulAdd(t0, x, splat(T, -0.0170881256));
-    t0 = mulAdd(t0, x, splat(T, 0.0308918810));
-    t0 = mulAdd(t0, x, splat(T, -0.0501743046));
-    t0 = mulAdd(t0, x, splat(T, 0.0889789874));
-    t0 = mulAdd(t0, x, splat(T, -0.2145988016));
-    t0 = root * mulAdd(t0, x, splat(T, 1.5707963050));
-
-    const t1 = splat(T, math.pi) - t0;
-    return select(v >= splat(T, 0.0), t0, t1);
-}
-
-pub fn atan(v: anytype) @TypeOf(v) {
-    // 17-degree minimax approximation
-    const T = @TypeOf(v);
-
-    const vabs = abs(v);
-    const vinv = splat(T, 1.0) / v;
-    var sign = select(v > splat(T, 1.0), splat(T, 1.0), splat(T, -1.0));
-    const comp = vabs <= splat(T, 1.0);
-    sign = select(comp, splat(T, 0.0), sign);
-    const x = select(comp, v, vinv);
-    const x2 = x * x;
-
-    var result = mulAdd(splat(T, 0.0028662257), x2, splat(T, -0.0161657367));
-    result = mulAdd(result, x2, splat(T, 0.0429096138));
-    result = mulAdd(result, x2, splat(T, -0.0752896400));
-    result = mulAdd(result, x2, splat(T, 0.1065626393));
-    result = mulAdd(result, x2, splat(T, -0.1420889944));
-    result = mulAdd(result, x2, splat(T, 0.1999355085));
-    result = mulAdd(result, x2, splat(T, -0.3333314528));
-    result = x * mulAdd(result, x2, splat(T, 1.0));
-
-    const result1 = sign * splat(T, 0.5 * math.pi) - result;
-    return select(sign == splat(T, 0.0), result, result1);
-}
-test "zmath.atan" {
-    const epsilon = 0.0001;
-    {
-        const v = f32x4(0.25, 0.5, 1.0, 1.25);
-        const e = f32x4(math.atan(v[0]), math.atan(v[1]), math.atan(v[2]), math.atan(v[3]));
-        try expect(approxEqAbs(e, atan(v), epsilon));
-    }
-    {
-        const v = f32x8(-0.25, 0.5, -1.0, 1.25, 100.0, -200.0, 300.0, 400.0);
-        // zig fmt: off
-        const e = f32x8(
-            math.atan(v[0]), math.atan(v[1]), math.atan(v[2]), math.atan(v[3]),
-            math.atan(v[4]), math.atan(v[5]), math.atan(v[6]), math.atan(v[7]),
-        );
-        // zig fmt: on
-        try expect(approxEqAbs(e, atan(v), epsilon));
-    }
-    {
-        // zig fmt: off
-        const v = f32x16(
-            -0.25, 0.5, -1.0, 0.0, 0.1, -0.2, 30.0, 400.0,
-            -0.25, 0.5, -1.0, -0.0, -0.05, -0.125, 0.0625, 4000.0
-        );
-        const e = f32x16(
-            math.atan(v[0]), math.atan(v[1]), math.atan(v[2]), math.atan(v[3]),
-            math.atan(v[4]), math.atan(v[5]), math.atan(v[6]), math.atan(v[7]),
-            math.atan(v[8]), math.atan(v[9]), math.atan(v[10]), math.atan(v[11]),
-            math.atan(v[12]), math.atan(v[13]), math.atan(v[14]), math.atan(v[15]),
-        );
-        // zig fmt: on
-        try expect(approxEqAbs(e, atan(v), epsilon));
-    }
-    {
-        try expect(approxEqAbs(atan(splat(F32x4, math.inf_f32)), splat(F32x4, 0.5 * math.pi), epsilon));
-        try expect(approxEqAbs(atan(splat(F32x4, -math.inf_f32)), splat(F32x4, -0.5 * math.pi), epsilon));
-        try expect(all(isNan(atan(splat(F32x4, math.nan_f32))), 0) == true);
-        try expect(all(isNan(atan(splat(F32x4, -math.nan_f32))), 0) == true);
-    }
-}
-
-pub fn atan2(vy: anytype, vx: anytype) @TypeOf(vx, vy) {
-    const T = @TypeOf(vx, vy);
-    const Tu = @Vector(veclen(T), u32);
-
-    const vx_is_positive =
-        (@bitCast(Tu, vx) & @splat(veclen(T), @as(u32, 0x8000_0000))) == @splat(veclen(T), @as(u32, 0));
-
-    const vy_sign = andInt(vy, splatNegativeZero(T));
-    const c0_25pi = orInt(vy_sign, splat(T, 0.25 * math.pi));
-    const c0_50pi = orInt(vy_sign, splat(T, 0.50 * math.pi));
-    const c0_75pi = orInt(vy_sign, splat(T, 0.75 * math.pi));
-    const c1_00pi = orInt(vy_sign, splat(T, 1.00 * math.pi));
-
-    var r1 = select(vx_is_positive, vy_sign, c1_00pi);
-    var r2 = select(vx == splat(T, 0.0), c0_50pi, splatInt(T, 0xffff_ffff));
-    const r3 = select(vy == splat(T, 0.0), r1, r2);
-    const r4 = select(vx_is_positive, c0_25pi, c0_75pi);
-    const r5 = select(isInf(vx), r4, c0_50pi);
-    const result = select(isInf(vy), r5, r3);
-    const result_valid = @bitCast(Tu, result) == @splat(veclen(T), @as(u32, 0xffff_ffff));
-
-    const v = vy / vx;
-    const r0 = atan(v);
-
-    r1 = select(vx_is_positive, splatNegativeZero(T), c1_00pi);
-    r2 = r0 + r1;
-
-    return select(result_valid, r2, result);
-}
-test "zmath.atan2" {
-    // From DirectXMath XMVectorATan2():
-    //
-    // Return the inverse tangent of Y / X in the range of -Pi to Pi with the following exceptions:
-
-    //     Y == 0 and X is Negative         -> Pi with the sign of Y
-    //     y == 0 and x is positive         -> 0 with the sign of y
-    //     Y != 0 and X == 0                -> Pi / 2 with the sign of Y
-    //     Y != 0 and X is Negative         -> atan(y/x) + (PI with the sign of Y)
-    //     X == -Infinity and Finite Y      -> Pi with the sign of Y
-    //     X == +Infinity and Finite Y      -> 0 with the sign of Y
-    //     Y == Infinity and X is Finite    -> Pi / 2 with the sign of Y
-    //     Y == Infinity and X == -Infinity -> 3Pi / 4 with the sign of Y
-    //     Y == Infinity and X == +Infinity -> Pi / 4 with the sign of Y
-
-    const epsilon = 0.0001;
-    try expect(approxEqAbs(atan2(splat(F32x4, 0.0), splat(F32x4, -1.0)), splat(F32x4, math.pi), epsilon));
-    try expect(approxEqAbs(atan2(splat(F32x4, -0.0), splat(F32x4, -1.0)), splat(F32x4, -math.pi), epsilon));
-    try expect(approxEqAbs(atan2(splat(F32x4, 1.0), splat(F32x4, 0.0)), splat(F32x4, 0.5 * math.pi), epsilon));
-    try expect(approxEqAbs(atan2(splat(F32x4, -1.0), splat(F32x4, 0.0)), splat(F32x4, -0.5 * math.pi), epsilon));
-    try expect(approxEqAbs(
-        atan2(splat(F32x4, 1.0), splat(F32x4, -1.0)),
-        splat(F32x4, math.atan(@as(f32, -1.0)) + math.pi),
-        epsilon,
-    ));
-    try expect(approxEqAbs(
-        atan2(splat(F32x4, -10.0), splat(F32x4, -2.0)),
-        splat(F32x4, math.atan(@as(f32, 5.0)) - math.pi),
-        epsilon,
-    ));
-    try expect(approxEqAbs(atan2(splat(F32x4, 1.0), splat(F32x4, -math.inf_f32)), splat(F32x4, math.pi), epsilon));
-    try expect(approxEqAbs(atan2(splat(F32x4, -1.0), splat(F32x4, -math.inf_f32)), splat(F32x4, -math.pi), epsilon));
-    try expect(approxEqAbs(atan2(splat(F32x4, 1.0), splat(F32x4, math.inf_f32)), splat(F32x4, 0.0), epsilon));
-    try expect(approxEqAbs(atan2(splat(F32x4, -1.0), splat(F32x4, math.inf_f32)), splat(F32x4, -0.0), epsilon));
-    try expect(approxEqAbs(
-        atan2(splat(F32x4, math.inf_f32), splat(F32x4, 2.0)),
-        splat(F32x4, 0.5 * math.pi),
-        epsilon,
-    ));
-    try expect(approxEqAbs(
-        atan2(splat(F32x4, -math.inf_f32), splat(F32x4, 2.0)),
-        splat(F32x4, -0.5 * math.pi),
-        epsilon,
-    ));
-    try expect(approxEqAbs(
-        atan2(splat(F32x4, math.inf_f32), splat(F32x4, -math.inf_f32)),
-        splat(F32x4, 0.75 * math.pi),
-        epsilon,
-    ));
-    try expect(approxEqAbs(
-        atan2(splat(F32x4, -math.inf_f32), splat(F32x4, -math.inf_f32)),
-        splat(F32x4, -0.75 * math.pi),
-        epsilon,
-    ));
-    try expect(approxEqAbs(
-        atan2(splat(F32x4, math.inf_f32), splat(F32x4, math.inf_f32)),
-        splat(F32x4, 0.25 * math.pi),
-        epsilon,
-    ));
-    try expect(approxEqAbs(
-        atan2(splat(F32x4, -math.inf_f32), splat(F32x4, math.inf_f32)),
-        splat(F32x4, -0.25 * math.pi),
-        epsilon,
-    ));
-    try expect(approxEqAbs(
-        atan2(
-            f32x8(0.0, -math.inf_f32, -0.0, 2.0, math.inf_f32, math.inf_f32, 1.0, -math.inf_f32),
-            f32x8(-2.0, math.inf_f32, 1.0, 0.0, 10.0, -math.inf_f32, 1.0, -math.inf_f32),
-        ),
-        f32x8(
-            math.pi,
-            -0.25 * math.pi,
-            -0.0,
-            0.5 * math.pi,
-            0.5 * math.pi,
-            0.75 * math.pi,
-            math.atan(@as(f32, 1.0)),
-            -0.75 * math.pi,
-        ),
-        epsilon,
-    ));
-    try expect(approxEqAbs(atan2(splat(F32x4, 0.0), splat(F32x4, 0.0)), splat(F32x4, 0.0), epsilon));
-    try expect(approxEqAbs(atan2(splat(F32x4, -0.0), splat(F32x4, 0.0)), splat(F32x4, 0.0), epsilon));
-    try expect(all(isNan(atan2(splat(F32x4, 1.0), splat(F32x4, math.nan_f32))), 0) == true);
-    try expect(all(isNan(atan2(splat(F32x4, -1.0), splat(F32x4, math.nan_f32))), 0) == true);
-    try expect(all(isNan(atan2(splat(F32x4, math.nan_f32), splat(F32x4, -1.0))), 0) == true);
-    try expect(all(isNan(atan2(splat(F32x4, -math.nan_f32), splat(F32x4, 1.0))), 0) == true);
-}
-// ------------------------------------------------------------------------------
-//
-// 3. 2D, 3D, 4D vector functions
-//
-// ------------------------------------------------------------------------------
-pub inline fn dot2(v0: Vec, v1: Vec) F32x4 {
-    var xmm0 = v0 * v1; // | x0*x1 | y0*y1 | -- | -- |
-    var xmm1 = swizzle(xmm0, .y, .x, .x, .x); // | y0*y1 | -- | -- | -- |
-    xmm0 = f32x4(xmm0[0] + xmm1[0], xmm0[1], xmm0[2], xmm0[3]); // | x0*x1 + y0*y1 | -- | -- | -- |
-    return swizzle(xmm0, .x, .x, .x, .x);
-}
-test "zmath.dot2" {
-    const v0 = f32x4(-1.0, 2.0, 300.0, -2.0);
-    const v1 = f32x4(4.0, 5.0, 600.0, 2.0);
-    var v = dot2(v0, v1);
-    try expect(approxEqAbs(v, splat(F32x4, 6.0), 0.0001));
-}
-
-pub inline fn dot3(v0: Vec, v1: Vec) F32x4 {
-    const dot = v0 * v1;
-    return f32x4s(dot[0] + dot[1] + dot[2]);
-}
-test "zmath.dot3" {
-    const v0 = f32x4(-1.0, 2.0, 3.0, 1.0);
-    const v1 = f32x4(4.0, 5.0, 6.0, 1.0);
-    var v = dot3(v0, v1);
-    try expect(approxEqAbs(v, splat(F32x4, 24.0), 0.0001));
-}
-
-pub inline fn dot4(v0: Vec, v1: Vec) F32x4 {
-    var xmm0 = v0 * v1; // | x0*x1 | y0*y1 | z0*z1 | w0*w1 |
-    var xmm1 = swizzle(xmm0, .y, .x, .w, .x); // | y0*y1 | -- | w0*w1 | -- |
-    xmm1 = xmm0 + xmm1; // | x0*x1 + y0*y1 | -- | z0*z1 + w0*w1 | -- |
-    xmm0 = swizzle(xmm1, .z, .x, .x, .x); // | z0*z1 + w0*w1 | -- | -- | -- |
-    xmm0 = f32x4(xmm0[0] + xmm1[0], xmm0[1], xmm0[2], xmm0[2]); // addss
-    return swizzle(xmm0, .x, .x, .x, .x);
-}
-test "zmath.dot4" {
-    const v0 = f32x4(-1.0, 2.0, 3.0, -2.0);
-    const v1 = f32x4(4.0, 5.0, 6.0, 2.0);
-    var v = dot4(v0, v1);
-    try expect(approxEqAbs(v, splat(F32x4, 20.0), 0.0001));
-}
-
-pub inline fn cross3(v0: Vec, v1: Vec) Vec {
-    var xmm0 = swizzle(v0, .y, .z, .x, .w);
-    var xmm1 = swizzle(v1, .z, .x, .y, .w);
-    var result = xmm0 * xmm1;
-    xmm0 = swizzle(xmm0, .y, .z, .x, .w);
-    xmm1 = swizzle(xmm1, .z, .x, .y, .w);
-    result = result - xmm0 * xmm1;
-    return andInt(result, f32x4_mask3);
-}
-test "zmath.cross3" {
-    {
-        const v0 = f32x4(1.0, 0.0, 0.0, 1.0);
-        const v1 = f32x4(0.0, 1.0, 0.0, 1.0);
-        var v = cross3(v0, v1);
-        try expect(approxEqAbs(v, f32x4(0.0, 0.0, 1.0, 0.0), 0.0001));
-    }
-    {
-        const v0 = f32x4(1.0, 0.0, 0.0, 1.0);
-        const v1 = f32x4(0.0, -1.0, 0.0, 1.0);
-        var v = cross3(v0, v1);
-        try expect(approxEqAbs(v, f32x4(0.0, 0.0, -1.0, 0.0), 0.0001));
-    }
-    {
-        const v0 = f32x4(-3.0, 0, -2.0, 1.0);
-        const v1 = f32x4(5.0, -1.0, 2.0, 1.0);
-        var v = cross3(v0, v1);
-        try expect(approxEqAbs(v, f32x4(-2.0, -4.0, 3.0, 0.0), 0.0001));
-    }
-}
-
-pub inline fn lengthSq2(v: Vec) F32x4 {
-    return dot2(v, v);
-}
-pub inline fn lengthSq3(v: Vec) F32x4 {
-    return dot3(v, v);
-}
-pub inline fn lengthSq4(v: Vec) F32x4 {
-    return dot4(v, v);
-}
-
-pub inline fn length2(v: Vec) F32x4 {
-    return sqrt(dot2(v, v));
-}
-pub inline fn length3(v: Vec) F32x4 {
-    return sqrt(dot3(v, v));
-}
-pub inline fn length4(v: Vec) F32x4 {
-    return sqrt(dot4(v, v));
-}
-test "zmath.length3" {
-    if (builtin.target.os.tag == .macos and builtin.zig_backend != .stage1) return error.SkipZigTest;
-    {
-        const v = length3(f32x4(1.0, -2.0, 3.0, 1000.0));
-        try expect(approxEqAbs(v, splat(F32x4, math.sqrt(14.0)), 0.001));
-    }
-    {
-        const v = length3(f32x4(1.0, math.nan_f32, math.nan_f32, 1000.0));
-        try expect(all(isNan(v), 0));
-    }
-    {
-        const v = length3(f32x4(1.0, math.inf_f32, 3.0, 1000.0));
-        try expect(all(isInf(v), 0));
-    }
-    {
-        const v = length3(f32x4(3.0, 2.0, 1.0, math.nan_f32));
-        try expect(approxEqAbs(v, splat(F32x4, math.sqrt(14.0)), 0.001));
-    }
-}
-
-pub inline fn normalize2(v: Vec) Vec {
-    return v * splat(F32x4, 1.0) / sqrt(dot2(v, v));
-}
-pub inline fn normalize3(v: Vec) Vec {
-    return v * splat(F32x4, 1.0) / sqrt(dot3(v, v));
-}
-pub inline fn normalize4(v: Vec) Vec {
-    return v * splat(F32x4, 1.0) / sqrt(dot4(v, v));
-}
-test "zmath.normalize3" {
-    {
-        const v0 = f32x4(1.0, -2.0, 3.0, 1000.0);
-        var v = normalize3(v0);
-        try expect(approxEqAbs(v, v0 * splat(F32x4, 1.0 / math.sqrt(14.0)), 0.0005));
-    }
-    {
-        try expect(any(isNan(normalize3(f32x4(1.0, math.inf_f32, 1.0, 1.0))), 0));
-        try expect(any(isNan(normalize3(f32x4(-math.inf_f32, math.inf_f32, 0.0, 0.0))), 0));
-        try expect(any(isNan(normalize3(f32x4(-math.nan_f32, math.qnan_f32, 0.0, 0.0))), 0));
-        try expect(any(isNan(normalize3(f32x4(0, 0, 0, 0))), 0));
-    }
-}
-test "zmath.normalize4" {
-    {
-        const v0 = f32x4(1.0, -2.0, 3.0, 10.0);
-        var v = normalize4(v0);
-        try expect(approxEqAbs(v, v0 * splat(F32x4, 1.0 / math.sqrt(114.0)), 0.0005));
-    }
-    {
-        try expect(any(isNan(normalize4(f32x4(1.0, math.inf_f32, 1.0, 1.0))), 0));
-        try expect(any(isNan(normalize4(f32x4(-math.inf_f32, math.inf_f32, 0.0, 0.0))), 0));
-        try expect(any(isNan(normalize4(f32x4(-math.nan_f32, math.qnan_f32, 0.0, 0.0))), 0));
-        try expect(any(isNan(normalize4(f32x4(0, 0, 0, 0))), 0));
-    }
-}
-
-fn vecMulMat(v: Vec, m: Mat) Vec {
-    var vx = @shuffle(f32, v, undefined, [4]i32{ 0, 0, 0, 0 });
-    var vy = @shuffle(f32, v, undefined, [4]i32{ 1, 1, 1, 1 });
-    var vz = @shuffle(f32, v, undefined, [4]i32{ 2, 2, 2, 2 });
-    var vw = @shuffle(f32, v, undefined, [4]i32{ 3, 3, 3, 3 });
-    return vx * m[0] + vy * m[1] + vz * m[2] + vw * m[3];
-}
-fn matMulVec(m: Mat, v: Vec) Vec {
-    return .{ dot4(m[0], v)[0], dot4(m[1], v)[0], dot4(m[2], v)[0], dot4(m[3], v)[0] };
-}
-test "zmath.vecMulMat" {
-    const m = Mat{
-        f32x4(1.0, 0.0, 0.0, 0.0),
-        f32x4(0.0, 1.0, 0.0, 0.0),
-        f32x4(0.0, 0.0, 1.0, 0.0),
-        f32x4(2.0, 3.0, 4.0, 1.0),
-    };
-    const vm = mul(f32x4(1.0, 2.0, 3.0, 1.0), m);
-    const mv = mul(m, f32x4(1.0, 2.0, 3.0, 1.0));
-    const v = mul(transpose(m), f32x4(1.0, 2.0, 3.0, 1.0));
-    try expect(approxEqAbs(vm, f32x4(3.0, 5.0, 7.0, 1.0), 0.0001));
-    try expect(approxEqAbs(mv, f32x4(1.0, 2.0, 3.0, 21.0), 0.0001));
-    try expect(approxEqAbs(v, f32x4(3.0, 5.0, 7.0, 1.0), 0.0001));
-}
-// ------------------------------------------------------------------------------
-//
-// 4. Matrix functions
-//
-// ------------------------------------------------------------------------------
-pub fn identity() Mat {
-    const static = struct {
-        const identity = Mat{
-            f32x4(1.0, 0.0, 0.0, 0.0),
-            f32x4(0.0, 1.0, 0.0, 0.0),
-            f32x4(0.0, 0.0, 1.0, 0.0),
-            f32x4(0.0, 0.0, 0.0, 1.0),
-        };
-    };
-    return static.identity;
-}
-
-fn mulRetType(comptime Ta: type, comptime Tb: type) type {
-    if (Ta == Mat and Tb == Mat) {
-        return Mat;
-    } else if ((Ta == f32 and Tb == Mat) or (Ta == Mat and Tb == f32)) {
-        return Mat;
-    } else if ((Ta == Vec and Tb == Mat) or (Ta == Mat and Tb == Vec)) {
-        return Vec;
-    }
-    @compileError("zmath.mul() not implemented for types: " ++ @typeName(Ta) ++ @typeName(Tb));
-}
-
-pub fn mul(a: anytype, b: anytype) mulRetType(@TypeOf(a), @TypeOf(b)) {
-    const Ta = @TypeOf(a);
-    const Tb = @TypeOf(b);
-    if (Ta == Mat and Tb == Mat) {
-        return mulMat(a, b);
-    } else if (Ta == f32 and Tb == Mat) {
-        const va = splat(F32x4, a);
-        return Mat{ va * b[0], va * b[1], va * b[2], va * b[3] };
-    } else if (Ta == Mat and Tb == f32) {
-        const vb = splat(F32x4, b);
-        return Mat{ a[0] * vb, a[1] * vb, a[2] * vb, a[3] * vb };
-    } else if (Ta == Vec and Tb == Mat) {
-        return vecMulMat(a, b);
-    } else if (Ta == Mat and Tb == Vec) {
-        return matMulVec(a, b);
-    } else {
-        @compileError("zmath.mul() not implemented for types: " ++ @typeName(Ta) ++ ", " ++ @typeName(Tb));
-    }
-}
-test "zmath.mul" {
-    {
-        const m = Mat{
-            f32x4(0.1, 0.2, 0.3, 0.4),
-            f32x4(0.5, 0.6, 0.7, 0.8),
-            f32x4(0.9, 1.0, 1.1, 1.2),
-            f32x4(1.3, 1.4, 1.5, 1.6),
-        };
-        const ms = mul(@as(f32, 2.0), m);
-        try expect(approxEqAbs(ms[0], f32x4(0.2, 0.4, 0.6, 0.8), 0.0001));
-        try expect(approxEqAbs(ms[1], f32x4(1.0, 1.2, 1.4, 1.6), 0.0001));
-        try expect(approxEqAbs(ms[2], f32x4(1.8, 2.0, 2.2, 2.4), 0.0001));
-        try expect(approxEqAbs(ms[3], f32x4(2.6, 2.8, 3.0, 3.2), 0.0001));
-    }
-}
-
-fn mulMat(m0: Mat, m1: Mat) Mat {
-    var result: Mat = undefined;
-    comptime var row: u32 = 0;
-    inline while (row < 4) : (row += 1) {
-        const vx = swizzle(m0[row], .x, .x, .x, .x);
-        const vy = swizzle(m0[row], .y, .y, .y, .y);
-        const vz = swizzle(m0[row], .z, .z, .z, .z);
-        const vw = swizzle(m0[row], .w, .w, .w, .w);
-        result[row] = mulAdd(vx, m1[0], vz * m1[2]) + mulAdd(vy, m1[1], vw * m1[3]);
-    }
-    return result;
-}
-test "zmath.matrix.mul" {
-    const a = Mat{
-        f32x4(0.1, 0.2, 0.3, 0.4),
-        f32x4(0.5, 0.6, 0.7, 0.8),
-        f32x4(0.9, 1.0, 1.1, 1.2),
-        f32x4(1.3, 1.4, 1.5, 1.6),
-    };
-    const b = Mat{
-        f32x4(1.7, 1.8, 1.9, 2.0),
-        f32x4(2.1, 2.2, 2.3, 2.4),
-        f32x4(2.5, 2.6, 2.7, 2.8),
-        f32x4(2.9, 3.0, 3.1, 3.2),
-    };
-    const c = mul(a, b);
-    try expect(approxEqAbs(c[0], f32x4(2.5, 2.6, 2.7, 2.8), 0.0001));
-    try expect(approxEqAbs(c[1], f32x4(6.18, 6.44, 6.7, 6.96), 0.0001));
-    try expect(approxEqAbs(c[2], f32x4(9.86, 10.28, 10.7, 11.12), 0.0001));
-    try expect(approxEqAbs(c[3], f32x4(13.54, 14.12, 14.7, 15.28), 0.0001));
-}
-
-pub fn transpose(m: Mat) Mat {
-    const temp1 = @shuffle(f32, m[0], m[1], [4]i32{ 0, 1, ~@as(i32, 0), ~@as(i32, 1) });
-    const temp3 = @shuffle(f32, m[0], m[1], [4]i32{ 2, 3, ~@as(i32, 2), ~@as(i32, 3) });
-    const temp2 = @shuffle(f32, m[2], m[3], [4]i32{ 0, 1, ~@as(i32, 0), ~@as(i32, 1) });
-    const temp4 = @shuffle(f32, m[2], m[3], [4]i32{ 2, 3, ~@as(i32, 2), ~@as(i32, 3) });
-    return .{
-        @shuffle(f32, temp1, temp2, [4]i32{ 0, 2, ~@as(i32, 0), ~@as(i32, 2) }),
-        @shuffle(f32, temp1, temp2, [4]i32{ 1, 3, ~@as(i32, 1), ~@as(i32, 3) }),
-        @shuffle(f32, temp3, temp4, [4]i32{ 0, 2, ~@as(i32, 0), ~@as(i32, 2) }),
-        @shuffle(f32, temp3, temp4, [4]i32{ 1, 3, ~@as(i32, 1), ~@as(i32, 3) }),
-    };
-}
-test "zmath.matrix.transpose" {
-    const m = Mat{
-        f32x4(1.0, 2.0, 3.0, 4.0),
-        f32x4(5.0, 6.0, 7.0, 8.0),
-        f32x4(9.0, 10.0, 11.0, 12.0),
-        f32x4(13.0, 14.0, 15.0, 16.0),
-    };
-    const mt = transpose(m);
-    try expect(approxEqAbs(mt[0], f32x4(1.0, 5.0, 9.0, 13.0), 0.0001));
-    try expect(approxEqAbs(mt[1], f32x4(2.0, 6.0, 10.0, 14.0), 0.0001));
-    try expect(approxEqAbs(mt[2], f32x4(3.0, 7.0, 11.0, 15.0), 0.0001));
-    try expect(approxEqAbs(mt[3], f32x4(4.0, 8.0, 12.0, 16.0), 0.0001));
-}
-
-pub fn rotationX(angle: f32) Mat {
-    const sc = sincos(angle);
-    return .{
-        f32x4(1.0, 0.0, 0.0, 0.0),
-        f32x4(0.0, sc[1], sc[0], 0.0),
-        f32x4(0.0, -sc[0], sc[1], 0.0),
-        f32x4(0.0, 0.0, 0.0, 1.0),
-    };
-}
-
-pub fn rotationY(angle: f32) Mat {
-    const sc = sincos(angle);
-    return .{
-        f32x4(sc[1], 0.0, -sc[0], 0.0),
-        f32x4(0.0, 1.0, 0.0, 0.0),
-        f32x4(sc[0], 0.0, sc[1], 0.0),
-        f32x4(0.0, 0.0, 0.0, 1.0),
-    };
-}
-
-pub fn rotationZ(angle: f32) Mat {
-    const sc = sincos(angle);
-    return .{
-        f32x4(sc[1], sc[0], 0.0, 0.0),
-        f32x4(-sc[0], sc[1], 0.0, 0.0),
-        f32x4(0.0, 0.0, 1.0, 0.0),
-        f32x4(0.0, 0.0, 0.0, 1.0),
-    };
-}
-
-pub fn translation(x: f32, y: f32, z: f32) Mat {
-    return .{
-        f32x4(1.0, 0.0, 0.0, 0.0),
-        f32x4(0.0, 1.0, 0.0, 0.0),
-        f32x4(0.0, 0.0, 1.0, 0.0),
-        f32x4(x, y, z, 1.0),
-    };
-}
-pub fn translationV(v: Vec) Mat {
-    return translation(v[0], v[1], v[2]);
-}
-
-pub fn scaling(x: f32, y: f32, z: f32) Mat {
-    return .{
-        f32x4(x, 0.0, 0.0, 0.0),
-        f32x4(0.0, y, 0.0, 0.0),
-        f32x4(0.0, 0.0, z, 0.0),
-        f32x4(0.0, 0.0, 0.0, 1.0),
-    };
-}
-pub fn scalingV(v: Vec) Mat {
-    return scaling(v[0], v[1], v[2]);
-}
-
-pub fn lookToLh(eyepos: Vec, eyedir: Vec, updir: Vec) Mat {
-    const az = normalize3(eyedir);
-    const ax = normalize3(cross3(updir, az));
-    const ay = normalize3(cross3(az, ax));
-    return transpose(.{
-        f32x4(ax[0], ax[1], ax[2], -dot3(ax, eyepos)[0]),
-        f32x4(ay[0], ay[1], ay[2], -dot3(ay, eyepos)[0]),
-        f32x4(az[0], az[1], az[2], -dot3(az, eyepos)[0]),
-        f32x4(0.0, 0.0, 0.0, 1.0),
-    });
-}
-pub fn lookToRh(eyepos: Vec, eyedir: Vec, updir: Vec) Mat {
-    return lookToLh(eyepos, -eyedir, updir);
-}
-pub fn lookAtLh(eyepos: Vec, focuspos: Vec, updir: Vec) Mat {
-    return lookToLh(eyepos, focuspos - eyepos, updir);
-}
-pub fn lookAtRh(eyepos: Vec, focuspos: Vec, updir: Vec) Mat {
-    return lookToLh(eyepos, eyepos - focuspos, updir);
-}
-test "zmath.matrix.lookToLh" {
-    const m = lookToLh(f32x4(0.0, 0.0, -3.0, 1.0), f32x4(0.0, 0.0, 1.0, 0.0), f32x4(0.0, 1.0, 0.0, 0.0));
-    try expect(approxEqAbs(m[0], f32x4(1.0, 0.0, 0.0, 0.0), 0.001));
-    try expect(approxEqAbs(m[1], f32x4(0.0, 1.0, 0.0, 0.0), 0.001));
-    try expect(approxEqAbs(m[2], f32x4(0.0, 0.0, 1.0, 0.0), 0.001));
-    try expect(approxEqAbs(m[3], f32x4(0.0, 0.0, 3.0, 1.0), 0.001));
-}
-
-pub fn perspectiveFovLh(fovy: f32, aspect: f32, near: f32, far: f32) Mat {
-    const scfov = sincos(0.5 * fovy);
-
-    assert(near > 0.0 and far > 0.0 and far > near);
-    assert(!math.approxEqAbs(f32, scfov[0], 0.0, 0.001));
-    assert(!math.approxEqAbs(f32, far, near, 0.001));
-    assert(!math.approxEqAbs(f32, aspect, 0.0, 0.01));
-
-    const h = scfov[1] / scfov[0];
-    const w = h / aspect;
-    const r = far / (far - near);
-    return .{
-        f32x4(w, 0.0, 0.0, 0.0),
-        f32x4(0.0, h, 0.0, 0.0),
-        f32x4(0.0, 0.0, r, 1.0),
-        f32x4(0.0, 0.0, -r * near, 0.0),
-    };
-}
-pub fn perspectiveFovRh(fovy: f32, aspect: f32, near: f32, far: f32) Mat {
-    const scfov = sincos(0.5 * fovy);
-
-    assert(near > 0.0 and far > 0.0 and far > near);
-    assert(!math.approxEqAbs(f32, scfov[0], 0.0, 0.001));
-    assert(!math.approxEqAbs(f32, far, near, 0.001));
-    assert(!math.approxEqAbs(f32, aspect, 0.0, 0.01));
-
-    const h = scfov[1] / scfov[0];
-    const w = h / aspect;
-    const r = far / (near - far);
-    return .{
-        f32x4(w, 0.0, 0.0, 0.0),
-        f32x4(0.0, h, 0.0, 0.0),
-        f32x4(0.0, 0.0, r, -1.0),
-        f32x4(0.0, 0.0, r * near, 0.0),
-    };
-}
-
-// Produces Z values in [-1.0, 1.0] range (OpenGL defaults)
-pub fn perspectiveFovLhGl(fovy: f32, aspect: f32, near: f32, far: f32) Mat {
-    const scfov = sincos(0.5 * fovy);
-
-    assert(near > 0.0 and far > 0.0 and far > near);
-    assert(!math.approxEqAbs(f32, scfov[0], 0.0, 0.001));
-    assert(!math.approxEqAbs(f32, far, near, 0.001));
-    assert(!math.approxEqAbs(f32, aspect, 0.0, 0.01));
-
-    const h = scfov[1] / scfov[0];
-    const w = h / aspect;
-    const r = far - near;
-    return .{
-        f32x4(w, 0.0, 0.0, 0.0),
-        f32x4(0.0, h, 0.0, 0.0),
-        f32x4(0.0, 0.0, (near + far) / r, 1.0),
-        f32x4(0.0, 0.0, 2.0 * near * far / -r, 0.0),
-    };
-}
-
-// Produces Z values in [-1.0, 1.0] range (OpenGL defaults)
-pub fn perspectiveFovRhGl(fovy: f32, aspect: f32, near: f32, far: f32) Mat {
-    const scfov = sincos(0.5 * fovy);
-
-    assert(near > 0.0 and far > 0.0 and far > near);
-    assert(!math.approxEqAbs(f32, scfov[0], 0.0, 0.001));
-    assert(!math.approxEqAbs(f32, far, near, 0.001));
-    assert(!math.approxEqAbs(f32, aspect, 0.0, 0.01));
-
-    const h = scfov[1] / scfov[0];
-    const w = h / aspect;
-    const r = near - far;
-    return .{
-        f32x4(w, 0.0, 0.0, 0.0),
-        f32x4(0.0, h, 0.0, 0.0),
-        f32x4(0.0, 0.0, (near + far) / r, -1.0),
-        f32x4(0.0, 0.0, 2.0 * near * far / r, 0.0),
-    };
-}
-
-pub fn orthographicLh(w: f32, h: f32, near: f32, far: f32) Mat {
-    assert(!math.approxEqAbs(f32, w, 0.0, 0.001));
-    assert(!math.approxEqAbs(f32, h, 0.0, 0.001));
-    assert(!math.approxEqAbs(f32, far, near, 0.001));
-
-    const r = 1 / (far - near);
-    return .{
-        f32x4(2 / w, 0.0, 0.0, 0.0),
-        f32x4(0.0, 2 / h, 0.0, 0.0),
-        f32x4(0.0, 0.0, r, 0.0),
-        f32x4(0.0, 0.0, -r * near, 1.0),
-    };
-}
-
-pub fn orthographicRh(w: f32, h: f32, near: f32, far: f32) Mat {
-    assert(!math.approxEqAbs(f32, w, 0.0, 0.001));
-    assert(!math.approxEqAbs(f32, h, 0.0, 0.001));
-    assert(!math.approxEqAbs(f32, far, near, 0.001));
-
-    const r = 1 / (near - far);
-    return .{
-        f32x4(2 / w, 0.0, 0.0, 0.0),
-        f32x4(0.0, 2 / h, 0.0, 0.0),
-        f32x4(0.0, 0.0, r, 0.0),
-        f32x4(0.0, 0.0, r * near, 1.0),
-    };
-}
-
-// Produces Z values in [-1.0, 1.0] range (OpenGL defaults)
-pub fn orthographicLhGl(w: f32, h: f32, near: f32, far: f32) Mat {
-    assert(!math.approxEqAbs(f32, w, 0.0, 0.001));
-    assert(!math.approxEqAbs(f32, h, 0.0, 0.001));
-    assert(!math.approxEqAbs(f32, far, near, 0.001));
-
-    const r = far - near;
-    return .{
-        f32x4(2 / w, 0.0, 0.0, 0.0),
-        f32x4(0.0, 2 / h, 0.0, 0.0),
-        f32x4(0.0, 0.0, 2 / r, 0.0),
-        f32x4(0.0, 0.0, (near + far) / -r, 1.0),
-    };
-}
-
-// Produces Z values in [-1.0, 1.0] range (OpenGL defaults)
-pub fn orthographicRhGl(w: f32, h: f32, near: f32, far: f32) Mat {
-    assert(!math.approxEqAbs(f32, w, 0.0, 0.001));
-    assert(!math.approxEqAbs(f32, h, 0.0, 0.001));
-    assert(!math.approxEqAbs(f32, far, near, 0.001));
-
-    const r = near - far;
-    return .{
-        f32x4(2 / w, 0.0, 0.0, 0.0),
-        f32x4(0.0, 2 / h, 0.0, 0.0),
-        f32x4(0.0, 0.0, 2 / r, 0.0),
-        f32x4(0.0, 0.0, (near + far) / r, 1.0),
-    };
-}
-
-pub fn orthographicOffCenterLh(left: f32, right: f32, top: f32, bottom: f32, near: f32, far: f32) Mat {
-    assert(!math.approxEqAbs(f32, far, near, 0.001));
-
-    const r = 1 / (far - near);
-    return .{
-        f32x4(2 / (right - left), 0.0, 0.0, 0.0),
-        f32x4(0.0, 2 / (top - bottom), 0.0, 0.0),
-        f32x4(0.0, 0.0, r, 0.0),
-        f32x4(-(right + left) / (right - left), -(top + bottom) / (top - bottom), -r * near, 1.0),
-    };
-}
-
-pub fn orthographicOffCenterRh(left: f32, right: f32, top: f32, bottom: f32, near: f32, far: f32) Mat {
-    assert(!math.approxEqAbs(f32, far, near, 0.001));
-
-    const r = 1 / (near - far);
-    return .{
-        f32x4(2 / (right - left), 0.0, 0.0, 0.0),
-        f32x4(0.0, 2 / (top - bottom), 0.0, 0.0),
-        f32x4(0.0, 0.0, r, 0.0),
-        f32x4(-(right + left) / (right - left), -(top + bottom) / (top - bottom), r * near, 1.0),
-    };
-}
-
-// Produces Z values in [-1.0, 1.0] range (OpenGL defaults)
-pub fn orthographicOffCenterLhGl(left: f32, right: f32, top: f32, bottom: f32, near: f32, far: f32) Mat {
-    assert(!math.approxEqAbs(f32, far, near, 0.001));
-
-    const r = far - near;
-    return .{
-        f32x4(2 / (right - left), 0.0, 0.0, 0.0),
-        f32x4(0.0, 2 / (top - bottom), 0.0, 0.0),
-        f32x4(0.0, 0.0, 2 / r, 0.0),
-        f32x4(-(right + left) / (right - left), -(top + bottom) / (top - bottom), (near + far) / -r, 1.0),
-    };
-}
-
-// Produces Z values in [-1.0, 1.0] range (OpenGL defaults)
-pub fn orthographicOffCenterRhGl(left: f32, right: f32, top: f32, bottom: f32, near: f32, far: f32) Mat {
-    assert(!math.approxEqAbs(f32, far, near, 0.001));
-
-    const r = near - far;
-    return .{
-        f32x4(2 / (right - left), 0.0, 0.0, 0.0),
-        f32x4(0.0, 2 / (top - bottom), 0.0, 0.0),
-        f32x4(0.0, 0.0, 2 / r, 0.0),
-        f32x4(-(right + left) / (right - left), -(top + bottom) / (top - bottom), (near + far) / r, 1.0),
-    };
-}
-
-pub fn determinant(m: Mat) F32x4 {
-    var v0 = swizzle(m[2], .y, .x, .x, .x);
-    var v1 = swizzle(m[3], .z, .z, .y, .y);
-    var v2 = swizzle(m[2], .y, .x, .x, .x);
-    var v3 = swizzle(m[3], .w, .w, .w, .z);
-    var v4 = swizzle(m[2], .z, .z, .y, .y);
-    var v5 = swizzle(m[3], .w, .w, .w, .z);
-
-    var p0 = v0 * v1;
-    var p1 = v2 * v3;
-    var p2 = v4 * v5;
-
-    v0 = swizzle(m[2], .z, .z, .y, .y);
-    v1 = swizzle(m[3], .y, .x, .x, .x);
-    v2 = swizzle(m[2], .w, .w, .w, .z);
-    v3 = swizzle(m[3], .y, .x, .x, .x);
-    v4 = swizzle(m[2], .w, .w, .w, .z);
-    v5 = swizzle(m[3], .z, .z, .y, .y);
-
-    p0 = mulAdd(-v0, v1, p0);
-    p1 = mulAdd(-v2, v3, p1);
-    p2 = mulAdd(-v4, v5, p2);
-
-    v0 = swizzle(m[1], .w, .w, .w, .z);
-    v1 = swizzle(m[1], .z, .z, .y, .y);
-    v2 = swizzle(m[1], .y, .x, .x, .x);
-
-    var s = m[0] * f32x4(1.0, -1.0, 1.0, -1.0);
-    var r = v0 * p0;
-    r = mulAdd(-v1, p1, r);
-    r = mulAdd(v2, p2, r);
-    return dot4(s, r);
-}
-test "zmath.matrix.determinant" {
-    const m = Mat{
-        f32x4(10.0, -9.0, -12.0, 1.0),
-        f32x4(7.0, -12.0, 11.0, 1.0),
-        f32x4(-10.0, 10.0, 3.0, 1.0),
-        f32x4(1.0, 2.0, 3.0, 4.0),
-    };
-    try expect(approxEqAbs(determinant(m), splat(F32x4, 2939.0), 0.0001));
-}
-
-pub fn inverse(a: anytype) @TypeOf(a) {
-    const T = @TypeOf(a);
-    return switch (T) {
-        Mat => inverseMat(a),
-        Quat => inverseQuat(a),
-        else => @compileError("zmath.inverse() not implemented for " ++ @typeName(T)),
-    };
-}
-
-fn inverseMat(m: Mat) Mat {
-    return inverseDet(m, null);
-}
-
-pub fn inverseDet(m: Mat, out_det: ?*F32x4) Mat {
-    const mt = transpose(m);
-    var v0: [4]F32x4 = undefined;
-    var v1: [4]F32x4 = undefined;
-
-    v0[0] = swizzle(mt[2], .x, .x, .y, .y);
-    v1[0] = swizzle(mt[3], .z, .w, .z, .w);
-    v0[1] = swizzle(mt[0], .x, .x, .y, .y);
-    v1[1] = swizzle(mt[1], .z, .w, .z, .w);
-    v0[2] = @shuffle(f32, mt[2], mt[0], [4]i32{ 0, 2, ~@as(i32, 0), ~@as(i32, 2) });
-    v1[2] = @shuffle(f32, mt[3], mt[1], [4]i32{ 1, 3, ~@as(i32, 1), ~@as(i32, 3) });
-
-    var d0 = v0[0] * v1[0];
-    var d1 = v0[1] * v1[1];
-    var d2 = v0[2] * v1[2];
-
-    v0[0] = swizzle(mt[2], .z, .w, .z, .w);
-    v1[0] = swizzle(mt[3], .x, .x, .y, .y);
-    v0[1] = swizzle(mt[0], .z, .w, .z, .w);
-    v1[1] = swizzle(mt[1], .x, .x, .y, .y);
-    v0[2] = @shuffle(f32, mt[2], mt[0], [4]i32{ 1, 3, ~@as(i32, 1), ~@as(i32, 3) });
-    v1[2] = @shuffle(f32, mt[3], mt[1], [4]i32{ 0, 2, ~@as(i32, 0), ~@as(i32, 2) });
-
-    d0 = mulAdd(-v0[0], v1[0], d0);
-    d1 = mulAdd(-v0[1], v1[1], d1);
-    d2 = mulAdd(-v0[2], v1[2], d2);
-
-    v0[0] = swizzle(mt[1], .y, .z, .x, .y);
-    v1[0] = @shuffle(f32, d0, d2, [4]i32{ ~@as(i32, 1), 1, 3, 0 });
-    v0[1] = swizzle(mt[0], .z, .x, .y, .x);
-    v1[1] = @shuffle(f32, d0, d2, [4]i32{ 3, ~@as(i32, 1), 1, 2 });
-    v0[2] = swizzle(mt[3], .y, .z, .x, .y);
-    v1[2] = @shuffle(f32, d1, d2, [4]i32{ ~@as(i32, 3), 1, 3, 0 });
-    v0[3] = swizzle(mt[2], .z, .x, .y, .x);
-    v1[3] = @shuffle(f32, d1, d2, [4]i32{ 3, ~@as(i32, 3), 1, 2 });
-
-    var c0 = v0[0] * v1[0];
-    var c2 = v0[1] * v1[1];
-    var c4 = v0[2] * v1[2];
-    var c6 = v0[3] * v1[3];
-
-    v0[0] = swizzle(mt[1], .z, .w, .y, .z);
-    v1[0] = @shuffle(f32, d0, d2, [4]i32{ 3, 0, 1, ~@as(i32, 0) });
-    v0[1] = swizzle(mt[0], .w, .z, .w, .y);
-    v1[1] = @shuffle(f32, d0, d2, [4]i32{ 2, 1, ~@as(i32, 0), 0 });
-    v0[2] = swizzle(mt[3], .z, .w, .y, .z);
-    v1[2] = @shuffle(f32, d1, d2, [4]i32{ 3, 0, 1, ~@as(i32, 2) });
-    v0[3] = swizzle(mt[2], .w, .z, .w, .y);
-    v1[3] = @shuffle(f32, d1, d2, [4]i32{ 2, 1, ~@as(i32, 2), 0 });
-
-    c0 = mulAdd(-v0[0], v1[0], c0);
-    c2 = mulAdd(-v0[1], v1[1], c2);
-    c4 = mulAdd(-v0[2], v1[2], c4);
-    c6 = mulAdd(-v0[3], v1[3], c6);
-
-    v0[0] = swizzle(mt[1], .w, .x, .w, .x);
-    v1[0] = @shuffle(f32, d0, d2, [4]i32{ 2, ~@as(i32, 1), ~@as(i32, 0), 2 });
-    v0[1] = swizzle(mt[0], .y, .w, .x, .z);
-    v1[1] = @shuffle(f32, d0, d2, [4]i32{ ~@as(i32, 1), 0, 3, ~@as(i32, 0) });
-    v0[2] = swizzle(mt[3], .w, .x, .w, .x);
-    v1[2] = @shuffle(f32, d1, d2, [4]i32{ 2, ~@as(i32, 3), ~@as(i32, 2), 2 });
-    v0[3] = swizzle(mt[2], .y, .w, .x, .z);
-    v1[3] = @shuffle(f32, d1, d2, [4]i32{ ~@as(i32, 3), 0, 3, ~@as(i32, 2) });
-
-    const c1 = mulAdd(-v0[0], v1[0], c0);
-    const c3 = mulAdd(v0[1], v1[1], c2);
-    const c5 = mulAdd(-v0[2], v1[2], c4);
-    const c7 = mulAdd(v0[3], v1[3], c6);
-
-    c0 = mulAdd(v0[0], v1[0], c0);
-    c2 = mulAdd(-v0[1], v1[1], c2);
-    c4 = mulAdd(v0[2], v1[2], c4);
-    c6 = mulAdd(-v0[3], v1[3], c6);
-
-    var mr = Mat{
-        f32x4(c0[0], c1[1], c0[2], c1[3]),
-        f32x4(c2[0], c3[1], c2[2], c3[3]),
-        f32x4(c4[0], c5[1], c4[2], c5[3]),
-        f32x4(c6[0], c7[1], c6[2], c7[3]),
-    };
-
-    const det = dot4(mr[0], mt[0]);
-    if (out_det != null) {
-        out_det.?.* = det;
-    }
-
-    if (math.approxEqAbs(f32, det[0], 0.0, math.f32_epsilon)) {
-        return .{
-            f32x4(0.0, 0.0, 0.0, 0.0),
-            f32x4(0.0, 0.0, 0.0, 0.0),
-            f32x4(0.0, 0.0, 0.0, 0.0),
-            f32x4(0.0, 0.0, 0.0, 0.0),
-        };
-    }
-
-    const scale = splat(F32x4, 1.0) / det;
-    mr[0] *= scale;
-    mr[1] *= scale;
-    mr[2] *= scale;
-    mr[3] *= scale;
-    return mr;
-}
-test "zmath.matrix.inverse" {
-    const m = Mat{
-        f32x4(10.0, -9.0, -12.0, 1.0),
-        f32x4(7.0, -12.0, 11.0, 1.0),
-        f32x4(-10.0, 10.0, 3.0, 1.0),
-        f32x4(1.0, 2.0, 3.0, 4.0),
-    };
-    var det: F32x4 = undefined;
-    const mi = inverseDet(m, &det);
-    try expect(approxEqAbs(det, splat(F32x4, 2939.0), 0.0001));
-
-    try expect(approxEqAbs(mi[0], f32x4(-0.170806, -0.13576, -0.349439, 0.164001), 0.0001));
-    try expect(approxEqAbs(mi[1], f32x4(-0.163661, -0.14801, -0.253147, 0.141204), 0.0001));
-    try expect(approxEqAbs(mi[2], f32x4(-0.0871045, 0.00646478, -0.0785982, 0.0398095), 0.0001));
-    try expect(approxEqAbs(mi[3], f32x4(0.18986, 0.103096, 0.272882, 0.10854), 0.0001));
-}
-
-pub fn matFromNormAxisAngle(axis: Vec, angle: f32) Mat {
-    const sincos_angle = sincos(angle);
-
-    const c2 = splat(F32x4, 1.0 - sincos_angle[1]);
-    const c1 = splat(F32x4, sincos_angle[1]);
-    const c0 = splat(F32x4, sincos_angle[0]);
-
-    const n0 = swizzle(axis, .y, .z, .x, .w);
-    const n1 = swizzle(axis, .z, .x, .y, .w);
-
-    var v0 = c2 * n0 * n1;
-    const r0 = c2 * axis * axis + c1;
-    const r1 = c0 * axis + v0;
-    var r2 = v0 - c0 * axis;
-
-    v0 = andInt(r0, f32x4_mask3);
-
-    var v1 = @shuffle(f32, r1, r2, [4]i32{ 0, 2, ~@as(i32, 1), ~@as(i32, 2) });
-    v1 = swizzle(v1, .y, .z, .w, .x);
-
-    var v2 = @shuffle(f32, r1, r2, [4]i32{ 1, 1, ~@as(i32, 0), ~@as(i32, 0) });
-    v2 = swizzle(v2, .x, .z, .x, .z);
-
-    r2 = @shuffle(f32, v0, v1, [4]i32{ 0, 3, ~@as(i32, 0), ~@as(i32, 1) });
-    r2 = swizzle(r2, .x, .z, .w, .y);
-
-    var m: Mat = undefined;
-    m[0] = r2;
-
-    r2 = @shuffle(f32, v0, v1, [4]i32{ 1, 3, ~@as(i32, 2), ~@as(i32, 3) });
-    r2 = swizzle(r2, .z, .x, .w, .y);
-    m[1] = r2;
-
-    v2 = @shuffle(f32, v2, v0, [4]i32{ 0, 1, ~@as(i32, 2), ~@as(i32, 3) });
-    m[2] = v2;
-    m[3] = f32x4(0.0, 0.0, 0.0, 1.0);
-    return m;
-}
-pub fn matFromAxisAngle(axis: Vec, angle: f32) Mat {
-    assert(!all(axis == splat(F32x4, 0.0), 3));
-    assert(!all(isInf(axis), 3));
-    const normal = normalize3(axis);
-    return matFromNormAxisAngle(normal, angle);
-}
-test "zmath.matrix.matFromAxisAngle" {
-    {
-        const m0 = matFromAxisAngle(f32x4(1.0, 0.0, 0.0, 0.0), math.pi * 0.25);
-        const m1 = rotationX(math.pi * 0.25);
-        try expect(approxEqAbs(m0[0], m1[0], 0.001));
-        try expect(approxEqAbs(m0[1], m1[1], 0.001));
-        try expect(approxEqAbs(m0[2], m1[2], 0.001));
-        try expect(approxEqAbs(m0[3], m1[3], 0.001));
-    }
-    {
-        const m0 = matFromAxisAngle(f32x4(0.0, 1.0, 0.0, 0.0), math.pi * 0.125);
-        const m1 = rotationY(math.pi * 0.125);
-        try expect(approxEqAbs(m0[0], m1[0], 0.001));
-        try expect(approxEqAbs(m0[1], m1[1], 0.001));
-        try expect(approxEqAbs(m0[2], m1[2], 0.001));
-        try expect(approxEqAbs(m0[3], m1[3], 0.001));
-    }
-    {
-        const m0 = matFromAxisAngle(f32x4(0.0, 0.0, 1.0, 0.0), math.pi * 0.333);
-        const m1 = rotationZ(math.pi * 0.333);
-        try expect(approxEqAbs(m0[0], m1[0], 0.001));
-        try expect(approxEqAbs(m0[1], m1[1], 0.001));
-        try expect(approxEqAbs(m0[2], m1[2], 0.001));
-        try expect(approxEqAbs(m0[3], m1[3], 0.001));
-    }
-}
-
-pub fn matFromQuat(quat: Quat) Mat {
-    var q0 = quat + quat;
-    var q1 = quat * q0;
-
-    var v0 = swizzle(q1, .y, .x, .x, .w);
-    v0 = andInt(v0, f32x4_mask3);
-
-    var v1 = swizzle(q1, .z, .z, .y, .w);
-    v1 = andInt(v1, f32x4_mask3);
-
-    var r0 = (f32x4(1.0, 1.0, 1.0, 0.0) - v0) - v1;
-
-    v0 = swizzle(quat, .x, .x, .y, .w);
-    v1 = swizzle(q0, .z, .y, .z, .w);
-    v0 = v0 * v1;
-
-    v1 = swizzle(quat, .w, .w, .w, .w);
-    var v2 = swizzle(q0, .y, .z, .x, .w);
-    v1 = v1 * v2;
-
-    var r1 = v0 + v1;
-    var r2 = v0 - v1;
-
-    v0 = @shuffle(f32, r1, r2, [4]i32{ 1, 2, ~@as(i32, 0), ~@as(i32, 1) });
-    v0 = swizzle(v0, .x, .z, .w, .y);
-    v1 = @shuffle(f32, r1, r2, [4]i32{ 0, 0, ~@as(i32, 2), ~@as(i32, 2) });
-    v1 = swizzle(v1, .x, .z, .x, .z);
-
-    q1 = @shuffle(f32, r0, v0, [4]i32{ 0, 3, ~@as(i32, 0), ~@as(i32, 1) });
-    q1 = swizzle(q1, .x, .z, .w, .y);
-
-    var m: Mat = undefined;
-    m[0] = q1;
-
-    q1 = @shuffle(f32, r0, v0, [4]i32{ 1, 3, ~@as(i32, 2), ~@as(i32, 3) });
-    q1 = swizzle(q1, .z, .x, .w, .y);
-    m[1] = q1;
-
-    q1 = @shuffle(f32, v1, r0, [4]i32{ 0, 1, ~@as(i32, 2), ~@as(i32, 3) });
-    m[2] = q1;
-    m[3] = f32x4(0.0, 0.0, 0.0, 1.0);
-    return m;
-}
-test "zmath.matrix.matFromQuat" {
-    {
-        const m = matFromQuat(f32x4(0.0, 0.0, 0.0, 1.0));
-        try expect(approxEqAbs(m[0], f32x4(1.0, 0.0, 0.0, 0.0), 0.0001));
-        try expect(approxEqAbs(m[1], f32x4(0.0, 1.0, 0.0, 0.0), 0.0001));
-        try expect(approxEqAbs(m[2], f32x4(0.0, 0.0, 1.0, 0.0), 0.0001));
-        try expect(approxEqAbs(m[3], f32x4(0.0, 0.0, 0.0, 1.0), 0.0001));
-    }
-}
-
-pub fn matFromRollPitchYaw(pitch: f32, yaw: f32, roll: f32) Mat {
-    return matFromRollPitchYawV(f32x4(pitch, yaw, roll, 0.0));
-}
-pub fn matFromRollPitchYawV(angles: Vec) Mat {
-    return matFromQuat(quatFromRollPitchYawV(angles));
-}
-
-pub fn matToQuat(m: Mat) Quat {
-    return quatFromMat(m);
-}
-
-pub inline fn loadMat(mem: []const f32) Mat {
-    return .{
-        load(mem[0..4], F32x4, 0),
-        load(mem[4..8], F32x4, 0),
-        load(mem[8..12], F32x4, 0),
-        load(mem[12..16], F32x4, 0),
-    };
-}
-test "zmath.loadMat" {
-    const a = [18]f32{
-        1.0,  2.0,  3.0,  4.0,
-        5.0,  6.0,  7.0,  8.0,
-        9.0,  10.0, 11.0, 12.0,
-        13.0, 14.0, 15.0, 16.0,
-        17.0, 18.0,
-    };
-    const m = loadMat(a[1..]);
-    try expect(approxEqAbs(m[0], f32x4(2.0, 3.0, 4.0, 5.0), 0.0));
-    try expect(approxEqAbs(m[1], f32x4(6.0, 7.0, 8.0, 9.0), 0.0));
-    try expect(approxEqAbs(m[2], f32x4(10.0, 11.0, 12.0, 13.0), 0.0));
-    try expect(approxEqAbs(m[3], f32x4(14.0, 15.0, 16.0, 17.0), 0.0));
-}
-
-pub inline fn storeMat(mem: []f32, m: Mat) void {
-    store(mem[0..4], m[0], 0);
-    store(mem[4..8], m[1], 0);
-    store(mem[8..12], m[2], 0);
-    store(mem[12..16], m[3], 0);
-}
-
-pub inline fn loadMat43(mem: []const f32) Mat {
-    return .{
-        f32x4(mem[0], mem[1], mem[2], 0.0),
-        f32x4(mem[3], mem[4], mem[5], 0.0),
-        f32x4(mem[6], mem[7], mem[8], 0.0),
-        f32x4(mem[9], mem[10], mem[11], 1.0),
-    };
-}
-
-pub inline fn storeMat43(mem: []f32, m: Mat) void {
-    store(mem[0..3], m[0], 3);
-    store(mem[3..6], m[1], 3);
-    store(mem[6..9], m[2], 3);
-    store(mem[9..12], m[3], 3);
-}
-
-pub inline fn loadMat34(mem: []const f32) Mat {
-    return .{
-        load(mem[0..4], F32x4, 0),
-        load(mem[4..8], F32x4, 0),
-        load(mem[8..12], F32x4, 0),
-        f32x4(0.0, 0.0, 0.0, 1.0),
-    };
-}
-
-pub inline fn storeMat34(mem: []f32, m: Mat) void {
-    store(mem[0..4], m[0], 0);
-    store(mem[4..8], m[1], 0);
-    store(mem[8..12], m[2], 0);
-}
-
-pub inline fn matToArr(m: Mat) [16]f32 {
-    var array: [16]f32 = undefined;
-    storeMat(array[0..], m);
-    return array;
-}
-
-pub inline fn matToArr43(m: Mat) [12]f32 {
-    var array: [12]f32 = undefined;
-    storeMat43(array[0..], m);
-    return array;
-}
-
-pub inline fn matToArr34(m: Mat) [12]f32 {
-    var array: [12]f32 = undefined;
-    storeMat34(array[0..], m);
-    return array;
-}
-// ------------------------------------------------------------------------------
-//
-// 5. Quaternion functions
-//
-// ------------------------------------------------------------------------------
-pub fn qmul(q0: Quat, q1: Quat) Quat {
-    var result = swizzle(q1, .w, .w, .w, .w);
-    var q1x = swizzle(q1, .x, .x, .x, .x);
-    var q1y = swizzle(q1, .y, .y, .y, .y);
-    var q1z = swizzle(q1, .z, .z, .z, .z);
-    result = result * q0;
-    var q0_shuf = swizzle(q0, .w, .z, .y, .x);
-    q1x = q1x * q0_shuf;
-    q0_shuf = swizzle(q0_shuf, .y, .x, .w, .z);
-    result = mulAdd(q1x, f32x4(1.0, -1.0, 1.0, -1.0), result);
-    q1y = q1y * q0_shuf;
-    q0_shuf = swizzle(q0_shuf, .w, .z, .y, .x);
-    q1y = q1y * f32x4(1.0, 1.0, -1.0, -1.0);
-    q1z = q1z * q0_shuf;
-    q1y = mulAdd(q1z, f32x4(-1.0, 1.0, 1.0, -1.0), q1y);
-    return result + q1y;
-}
-test "zmath.quaternion.mul" {
-    {
-        const q0 = f32x4(2.0, 3.0, 4.0, 1.0);
-        const q1 = f32x4(3.0, 2.0, 1.0, 4.0);
-        try expect(approxEqAbs(qmul(q0, q1), f32x4(16.0, 4.0, 22.0, -12.0), 0.0001));
-    }
-}
-
-pub fn quatToMat(quat: Quat) Mat {
-    return matFromQuat(quat);
-}
-
-pub fn quatToAxisAngle(quat: Quat, axis: *Vec, angle: *f32) void {
-    axis.* = quat;
-    angle.* = 2.0 * acos(quat[3]);
-}
-test "zmath.quaternion.quatToAxisAngle" {
-    {
-        const q0 = quatFromNormAxisAngle(f32x4(1.0, 0.0, 0.0, 0.0), 0.25 * math.pi);
-        var axis: Vec = f32x4(4.0, 3.0, 2.0, 1.0);
-        var angle: f32 = 10.0;
-        quatToAxisAngle(q0, &axis, &angle);
-        try expect(math.approxEqAbs(f32, axis[0], @sin(@as(f32, 0.25) * math.pi * 0.5), 0.0001));
-        try expect(axis[1] == 0.0);
-        try expect(axis[2] == 0.0);
-        try expect(math.approxEqAbs(f32, angle, 0.25 * math.pi, 0.0001));
-    }
-}
-
-pub fn quatFromMat(m: Mat) Quat {
-    const r0 = m[0];
-    const r1 = m[1];
-    const r2 = m[2];
-    const r00 = swizzle(r0, .x, .x, .x, .x);
-    const r11 = swizzle(r1, .y, .y, .y, .y);
-    const r22 = swizzle(r2, .z, .z, .z, .z);
-
-    const x2gey2 = (r11 - r00) <= splat(F32x4, 0.0);
-    const z2gew2 = (r11 + r00) <= splat(F32x4, 0.0);
-    const x2py2gez2pw2 = r22 <= splat(F32x4, 0.0);
-
-    var t0 = mulAdd(r00, f32x4(1.0, -1.0, -1.0, 1.0), splat(F32x4, 1.0));
-    var t1 = r11 * f32x4(-1.0, 1.0, -1.0, 1.0);
-    var t2 = mulAdd(r22, f32x4(-1.0, -1.0, 1.0, 1.0), t0);
-    const x2y2z2w2 = t1 + t2;
-
-    t0 = @shuffle(f32, r0, r1, [4]i32{ 1, 2, ~@as(i32, 2), ~@as(i32, 1) });
-    t1 = @shuffle(f32, r1, r2, [4]i32{ 0, 0, ~@as(i32, 0), ~@as(i32, 1) });
-    t1 = swizzle(t1, .x, .z, .w, .y);
-    const xyxzyz = t0 + t1;
-
-    t0 = @shuffle(f32, r2, r1, [4]i32{ 1, 0, ~@as(i32, 0), ~@as(i32, 0) });
-    t1 = @shuffle(f32, r1, r0, [4]i32{ 2, 2, ~@as(i32, 2), ~@as(i32, 1) });
-    t1 = swizzle(t1, .x, .z, .w, .y);
-    const xwywzw = (t0 - t1) * f32x4(-1.0, 1.0, -1.0, 1.0);
-
-    t0 = @shuffle(f32, x2y2z2w2, xyxzyz, [4]i32{ 0, 1, ~@as(i32, 0), ~@as(i32, 0) });
-    t1 = @shuffle(f32, x2y2z2w2, xwywzw, [4]i32{ 2, 3, ~@as(i32, 2), ~@as(i32, 0) });
-    t2 = @shuffle(f32, xyxzyz, xwywzw, [4]i32{ 1, 2, ~@as(i32, 0), ~@as(i32, 1) });
-
-    const tensor0 = @shuffle(f32, t0, t2, [4]i32{ 0, 2, ~@as(i32, 0), ~@as(i32, 2) });
-    const tensor1 = @shuffle(f32, t0, t2, [4]i32{ 2, 1, ~@as(i32, 1), ~@as(i32, 3) });
-    const tensor2 = @shuffle(f32, t2, t1, [4]i32{ 0, 1, ~@as(i32, 0), ~@as(i32, 2) });
-    const tensor3 = @shuffle(f32, t2, t1, [4]i32{ 2, 3, ~@as(i32, 2), ~@as(i32, 1) });
-
-    t0 = select(x2gey2, tensor0, tensor1);
-    t1 = select(z2gew2, tensor2, tensor3);
-    t2 = select(x2py2gez2pw2, t0, t1);
-
-    return t2 / length4(t2);
-}
-test "zmath.quatFromMat" {
-    {
-        const q0 = quatFromAxisAngle(f32x4(1.0, 0.0, 0.0, 0.0), 0.25 * math.pi);
-        const q1 = quatFromMat(rotationX(0.25 * math.pi));
-        try expect(approxEqAbs(q0, q1, 0.0001));
-    }
-    {
-        const q0 = quatFromAxisAngle(f32x4(1.0, 2.0, 0.5, 0.0), 0.25 * math.pi);
-        const q1 = quatFromMat(matFromAxisAngle(f32x4(1.0, 2.0, 0.5, 0.0), 0.25 * math.pi));
-        try expect(approxEqAbs(q0, q1, 0.0001));
-    }
-    {
-        const q0 = quatFromRollPitchYaw(0.1 * math.pi, -0.2 * math.pi, 0.3 * math.pi);
-        const q1 = quatFromMat(matFromRollPitchYaw(0.1 * math.pi, -0.2 * math.pi, 0.3 * math.pi));
-        try expect(approxEqAbs(q0, q1, 0.0001));
-    }
-}
-
-pub fn quatFromNormAxisAngle(axis: Vec, angle: f32) Quat {
-    var n = f32x4(axis[0], axis[1], axis[2], 1.0);
-    const sc = sincos(0.5 * angle);
-    return n * f32x4(sc[0], sc[0], sc[0], sc[1]);
-}
-pub fn quatFromAxisAngle(axis: Vec, angle: f32) Quat {
-    assert(!all(axis == splat(F32x4, 0.0), 3));
-    assert(!all(isInf(axis), 3));
-    const normal = normalize3(axis);
-    return quatFromNormAxisAngle(normal, angle);
-}
-test "zmath.quaternion.quatFromNormAxisAngle" {
-    {
-        const q0 = quatFromAxisAngle(f32x4(1.0, 0.0, 0.0, 0.0), 0.25 * math.pi);
-        const q1 = quatFromAxisAngle(f32x4(0.0, 1.0, 0.0, 0.0), 0.125 * math.pi);
-        const m0 = rotationX(0.25 * math.pi);
-        const m1 = rotationY(0.125 * math.pi);
-        const mr0 = quatToMat(qmul(q0, q1));
-        const mr1 = mul(m0, m1);
-        try expect(approxEqAbs(mr0[0], mr1[0], 0.0001));
-        try expect(approxEqAbs(mr0[1], mr1[1], 0.0001));
-        try expect(approxEqAbs(mr0[2], mr1[2], 0.0001));
-        try expect(approxEqAbs(mr0[3], mr1[3], 0.0001));
-    }
-    {
-        const m0 = quatToMat(quatFromAxisAngle(f32x4(1.0, 2.0, 0.5, 0.0), 0.25 * math.pi));
-        const m1 = matFromAxisAngle(f32x4(1.0, 2.0, 0.5, 0.0), 0.25 * math.pi);
-        try expect(approxEqAbs(m0[0], m1[0], 0.0001));
-        try expect(approxEqAbs(m0[1], m1[1], 0.0001));
-        try expect(approxEqAbs(m0[2], m1[2], 0.0001));
-        try expect(approxEqAbs(m0[3], m1[3], 0.0001));
-    }
-}
-
-pub inline fn qidentity() Quat {
-    return f32x4(@as(f32, 0.0), @as(f32, 0.0), @as(f32, 0.0), @as(f32, 1.0));
-}
-
-pub inline fn conjugate(quat: Quat) Quat {
-    return quat * f32x4(-1.0, -1.0, -1.0, 1.0);
-}
-
-fn inverseQuat(quat: Quat) Quat {
-    const l = lengthSq4(quat);
-    const conj = conjugate(quat);
-    return select(l <= splat(F32x4, math.f32_epsilon), splat(F32x4, 0.0), conj / l);
-}
-test "zmath.quaternion.inverseQuat" {
-    try expect(approxEqAbs(
-        inverse(f32x4(2.0, 3.0, 4.0, 1.0)),
-        f32x4(-1.0 / 15.0, -1.0 / 10.0, -2.0 / 15.0, 1.0 / 30.0),
-        0.0001,
-    ));
-    try expect(approxEqAbs(inverse(qidentity()), qidentity(), 0.0001));
-}
-
-pub fn slerp(q0: Quat, q1: Quat, t: f32) Quat {
-    return slerpV(q0, q1, splat(F32x4, t));
-}
-pub fn slerpV(q0: Quat, q1: Quat, t: F32x4) Quat {
-    var cos_omega = dot4(q0, q1);
-    const sign = select(cos_omega < splat(F32x4, 0.0), splat(F32x4, -1.0), splat(F32x4, 1.0));
-
-    cos_omega = cos_omega * sign;
-    const sin_omega = sqrt(splat(F32x4, 1.0) - cos_omega * cos_omega);
-
-    const omega = atan2(sin_omega, cos_omega);
-
-    var v01 = t;
-    v01 = xorInt(andInt(v01, f32x4_mask2), f32x4_sign_mask1);
-    v01 = f32x4(1.0, 0.0, 0.0, 0.0) + v01;
-
-    var s0 = sin(v01 * omega) / sin_omega;
-    s0 = select(cos_omega < splat(F32x4, 1.0 - 0.00001), s0, v01);
-
-    var s1 = swizzle(s0, .y, .y, .y, .y);
-    s0 = swizzle(s0, .x, .x, .x, .x);
-
-    return q0 * s0 + sign * q1 * s1;
-}
-test "zmath.quaternion.slerp" {
-    const from = f32x4(0.0, 0.0, 0.0, 1.0);
-    const to = f32x4(0.5, 0.5, -0.5, 0.5);
-    const result = slerp(from, to, 0.5);
-    try expect(approxEqAbs(result, f32x4(0.28867513, 0.28867513, -0.28867513, 0.86602540), 0.0001));
-}
-
-pub fn quatFromRollPitchYaw(pitch: f32, yaw: f32, roll: f32) Quat {
-    return quatFromRollPitchYawV(f32x4(pitch, yaw, roll, 0.0));
-}
-pub fn quatFromRollPitchYawV(angles: Vec) Quat { // | pitch | yaw | roll | 0 |
-    const sc = sincos(splat(Vec, 0.5) * angles);
-    const p0 = @shuffle(f32, sc[1], sc[0], [4]i32{ ~@as(i32, 0), 0, 0, 0 });
-    const p1 = @shuffle(f32, sc[0], sc[1], [4]i32{ ~@as(i32, 0), 0, 0, 0 });
-    const y0 = @shuffle(f32, sc[1], sc[0], [4]i32{ 1, ~@as(i32, 1), 1, 1 });
-    const y1 = @shuffle(f32, sc[0], sc[1], [4]i32{ 1, ~@as(i32, 1), 1, 1 });
-    const r0 = @shuffle(f32, sc[1], sc[0], [4]i32{ 2, 2, ~@as(i32, 2), 2 });
-    const r1 = @shuffle(f32, sc[0], sc[1], [4]i32{ 2, 2, ~@as(i32, 2), 2 });
-    const q1 = p1 * f32x4(1.0, -1.0, -1.0, 1.0) * y1;
-    const q0 = p0 * y0 * r0;
-    return mulAdd(q1, r1, q0);
-}
-test "zmath.quaternion.quatFromRollPitchYawV" {
-    {
-        const m0 = quatToMat(quatFromRollPitchYawV(f32x4(0.25 * math.pi, 0.0, 0.0, 0.0)));
-        const m1 = rotationX(0.25 * math.pi);
-        try expect(approxEqAbs(m0[0], m1[0], 0.0001));
-        try expect(approxEqAbs(m0[1], m1[1], 0.0001));
-        try expect(approxEqAbs(m0[2], m1[2], 0.0001));
-        try expect(approxEqAbs(m0[3], m1[3], 0.0001));
-    }
-    {
-        const m0 = quatToMat(quatFromRollPitchYaw(0.1 * math.pi, 0.2 * math.pi, 0.3 * math.pi));
-        const m1 = mul(
-            rotationZ(0.3 * math.pi),
-            mul(rotationX(0.1 * math.pi), rotationY(0.2 * math.pi)),
-        );
-        try expect(approxEqAbs(m0[0], m1[0], 0.0001));
-        try expect(approxEqAbs(m0[1], m1[1], 0.0001));
-        try expect(approxEqAbs(m0[2], m1[2], 0.0001));
-        try expect(approxEqAbs(m0[3], m1[3], 0.0001));
-    }
-}
-// ------------------------------------------------------------------------------
-//
-// 6. Color functions
-//
-// ------------------------------------------------------------------------------
-pub fn adjustSaturation(color: F32x4, saturation: f32) F32x4 {
-    const luminance = dot3(f32x4(0.2125, 0.7154, 0.0721, 0.0), color);
-    var result = mulAdd(color - luminance, f32x4s(saturation), luminance);
-    result[3] = color[3];
-    return result;
-}
-
-pub fn adjustContrast(color: F32x4, contrast: f32) F32x4 {
-    var result = mulAdd(color - f32x4s(0.5), f32x4s(contrast), f32x4s(0.5));
-    result[3] = color[3];
-    return result;
-}
-
-pub fn rgbToHsl(rgb: F32x4) F32x4 {
-    const r = swizzle(rgb, .x, .x, .x, .x);
-    const g = swizzle(rgb, .y, .y, .y, .y);
-    const b = swizzle(rgb, .z, .z, .z, .z);
-
-    const minv = min(r, min(g, b));
-    const maxv = max(r, max(g, b));
-
-    const l = (minv + maxv) * f32x4s(0.5);
-    const d = maxv - minv;
-    const la = select(boolx4(true, true, true, false), l, rgb);
-
-    if (all(d < f32x4s(math.f32_epsilon), 3)) {
-        return select(boolx4(true, true, false, false), f32x4s(0.0), la);
-    } else {
-        var s: F32x4 = undefined;
-        var h: F32x4 = undefined;
-
-        const d2 = minv + maxv;
-
-        if (all(l > f32x4s(0.5), 3)) {
-            s = d / (f32x4s(2.0) - d2);
-        } else {
-            s = d / d2;
-        }
-
-        if (all(r == maxv, 3)) {
-            h = (g - b) / d;
-        } else if (all(g == maxv, 3)) {
-            h = f32x4s(2.0) + (b - r) / d;
-        } else {
-            h = f32x4s(4.0) + (r - g) / d;
-        }
-
-        h /= f32x4s(6.0);
-
-        if (all(h < f32x4s(0.0), 3)) {
-            h += f32x4s(1.0);
-        }
-
-        const lha = select(boolx4(true, true, false, false), h, la);
-        return select(boolx4(true, false, true, true), lha, s);
-    }
-}
-test "zmath.color.rgbToHsl" {
-    try expect(approxEqAbs(rgbToHsl(f32x4(0.2, 0.4, 0.8, 1.0)), f32x4(0.6111, 0.6, 0.5, 1.0), 0.0001));
-    try expect(approxEqAbs(rgbToHsl(f32x4(1.0, 0.0, 0.0, 0.5)), f32x4(0.0, 1.0, 0.5, 0.5), 0.0001));
-    try expect(approxEqAbs(rgbToHsl(f32x4(0.0, 1.0, 0.0, 0.25)), f32x4(0.3333, 1.0, 0.5, 0.25), 0.0001));
-    try expect(approxEqAbs(rgbToHsl(f32x4(0.0, 0.0, 1.0, 1.0)), f32x4(0.6666, 1.0, 0.5, 1.0), 0.0001));
-    try expect(approxEqAbs(rgbToHsl(f32x4(0.0, 0.0, 0.0, 1.0)), f32x4(0.0, 0.0, 0.0, 1.0), 0.0001));
-    try expect(approxEqAbs(rgbToHsl(f32x4(1.0, 1.0, 1.0, 1.0)), f32x4(0.0, 0.0, 1.0, 1.0), 0.0001));
-}
-
-fn hueToClr(p: F32x4, q: F32x4, h: F32x4) F32x4 {
-    var t = h;
-
-    if (all(t < f32x4s(0.0), 3))
-        t += f32x4s(1.0);
-
-    if (all(t > f32x4s(1.0), 3))
-        t -= f32x4s(1.0);
-
-    if (all(t < f32x4s(1.0 / 6.0), 3))
-        return mulAdd(q - p, f32x4s(6.0) * t, p);
-
-    if (all(t < f32x4s(0.5), 3))
-        return q;
-
-    if (all(t < f32x4s(2.0 / 3.0), 3))
-        return mulAdd(q - p, f32x4s(6.0) * (f32x4s(2.0 / 3.0) - t), p);
-
-    return p;
-}
-
-pub fn hslToRgb(hsl: F32x4) F32x4 {
-    const s = swizzle(hsl, .y, .y, .y, .y);
-    const l = swizzle(hsl, .z, .z, .z, .z);
-
-    if (all(isNearEqual(s, f32x4s(0.0), f32x4s(math.f32_epsilon)), 3)) {
-        return select(boolx4(true, true, true, false), l, hsl);
-    } else {
-        const h = swizzle(hsl, .x, .x, .x, .x);
-        var q: F32x4 = undefined;
-        if (all(l < f32x4s(0.5), 3)) {
-            q = l * (f32x4s(1.0) + s);
-        } else {
-            q = (l + s) - (l * s);
-        }
-
-        const p = f32x4s(2.0) * l - q;
-
-        const r = hueToClr(p, q, h + f32x4s(1.0 / 3.0));
-        const g = hueToClr(p, q, h);
-        const b = hueToClr(p, q, h - f32x4s(1.0 / 3.0));
-
-        const rg = select(boolx4(true, false, false, false), r, g);
-        const ba = select(boolx4(true, true, true, false), b, hsl);
-        return select(boolx4(true, true, false, false), rg, ba);
-    }
-}
-test "zmath.color.hslToRgb" {
-    try expect(approxEqAbs(f32x4(0.2, 0.4, 0.8, 1.0), hslToRgb(f32x4(0.6111, 0.6, 0.5, 1.0)), 0.0001));
-    try expect(approxEqAbs(f32x4(1.0, 0.0, 0.0, 0.5), hslToRgb(f32x4(0.0, 1.0, 0.5, 0.5)), 0.0001));
-    try expect(approxEqAbs(f32x4(0.0, 1.0, 0.0, 0.25), hslToRgb(f32x4(0.3333, 1.0, 0.5, 0.25)), 0.0005));
-    try expect(approxEqAbs(f32x4(0.0, 0.0, 1.0, 1.0), hslToRgb(f32x4(0.6666, 1.0, 0.5, 1.0)), 0.0005));
-    try expect(approxEqAbs(f32x4(0.0, 0.0, 0.0, 1.0), hslToRgb(f32x4(0.0, 0.0, 0.0, 1.0)), 0.0001));
-    try expect(approxEqAbs(f32x4(1.0, 1.0, 1.0, 1.0), hslToRgb(f32x4(0.0, 0.0, 1.0, 1.0)), 0.0001));
-    try expect(approxEqAbs(hslToRgb(rgbToHsl(f32x4(1.0, 1.0, 1.0, 1.0))), f32x4(1.0, 1.0, 1.0, 1.0), 0.0005));
-    try expect(approxEqAbs(
-        hslToRgb(rgbToHsl(f32x4(0.82198, 0.1839, 0.632, 1.0))),
-        f32x4(0.82198, 0.1839, 0.632, 1.0),
-        0.0005,
-    ));
-    try expect(approxEqAbs(
-        rgbToHsl(hslToRgb(f32x4(0.82198, 0.1839, 0.632, 1.0))),
-        f32x4(0.82198, 0.1839, 0.632, 1.0),
-        0.0005,
-    ));
-    try expect(approxEqAbs(
-        rgbToHsl(hslToRgb(f32x4(0.1839, 0.82198, 0.632, 1.0))),
-        f32x4(0.1839, 0.82198, 0.632, 1.0),
-        0.0005,
-    ));
-    try expect(approxEqAbs(
-        hslToRgb(rgbToHsl(f32x4(0.1839, 0.632, 0.82198, 1.0))),
-        f32x4(0.1839, 0.632, 0.82198, 1.0),
-        0.0005,
-    ));
-}
-
-pub fn rgbToHsv(rgb: F32x4) F32x4 {
-    const r = swizzle(rgb, .x, .x, .x, .x);
-    const g = swizzle(rgb, .y, .y, .y, .y);
-    const b = swizzle(rgb, .z, .z, .z, .z);
-
-    const minv = min(r, min(g, b));
-    const v = max(r, max(g, b));
-    const d = v - minv;
-    const s = if (all(isNearEqual(v, f32x4s(0.0), f32x4s(math.f32_epsilon)), 3)) f32x4s(0.0) else d / v;
-
-    if (all(d < f32x4s(math.f32_epsilon), 3)) {
-        const hv = select(boolx4(true, false, false, false), f32x4s(0.0), v);
-        const hva = select(boolx4(true, true, true, false), hv, rgb);
-        return select(boolx4(true, false, true, true), hva, s);
-    } else {
-        var h: F32x4 = undefined;
-        if (all(r == v, 3)) {
-            h = (g - b) / d;
-            if (all(g < b, 3))
-                h += f32x4s(6.0);
-        } else if (all(g == v, 3)) {
-            h = f32x4s(2.0) + (b - r) / d;
-        } else {
-            h = f32x4s(4.0) + (r - g) / d;
-        }
-
-        h /= f32x4s(6.0);
-        const hv = select(boolx4(true, false, false, false), h, v);
-        const hva = select(boolx4(true, true, true, false), hv, rgb);
-        return select(boolx4(true, false, true, true), hva, s);
-    }
-}
-test "zmath.color.rgbToHsv" {
-    try expect(approxEqAbs(rgbToHsv(f32x4(0.2, 0.4, 0.8, 1.0)), f32x4(0.6111, 0.75, 0.8, 1.0), 0.0001));
-    try expect(approxEqAbs(rgbToHsv(f32x4(0.4, 0.2, 0.8, 1.0)), f32x4(0.7222, 0.75, 0.8, 1.0), 0.0001));
-    try expect(approxEqAbs(rgbToHsv(f32x4(0.4, 0.8, 0.2, 1.0)), f32x4(0.2777, 0.75, 0.8, 1.0), 0.0001));
-    try expect(approxEqAbs(rgbToHsv(f32x4(1.0, 0.0, 0.0, 0.5)), f32x4(0.0, 1.0, 1.0, 0.5), 0.0001));
-    try expect(approxEqAbs(rgbToHsv(f32x4(0.0, 1.0, 0.0, 0.25)), f32x4(0.3333, 1.0, 1.0, 0.25), 0.0001));
-    try expect(approxEqAbs(rgbToHsv(f32x4(0.0, 0.0, 1.0, 1.0)), f32x4(0.6666, 1.0, 1.0, 1.0), 0.0001));
-    try expect(approxEqAbs(rgbToHsv(f32x4(0.0, 0.0, 0.0, 1.0)), f32x4(0.0, 0.0, 0.0, 1.0), 0.0001));
-    try expect(approxEqAbs(rgbToHsv(f32x4(1.0, 1.0, 1.0, 1.0)), f32x4(0.0, 0.0, 1.0, 1.0), 0.0001));
-}
-
-pub fn hsvToRgb(hsv: F32x4) F32x4 {
-    const h = swizzle(hsv, .x, .x, .x, .x);
-    const s = swizzle(hsv, .y, .y, .y, .y);
-    const v = swizzle(hsv, .z, .z, .z, .z);
-
-    const h6 = h * f32x4s(6.0);
-    const i = floor(h6);
-    const f = h6 - i;
-
-    const p = v * (f32x4s(1.0) - s);
-    const q = v * (f32x4s(1.0) - f * s);
-    const t = v * (f32x4s(1.0) - (f32x4s(1.0) - f) * s);
-
-    const ii = @floatToInt(i32, mod(i, f32x4s(6.0))[0]);
-    const rgb = switch (ii) {
-        0 => blk: {
-            const vt = select(boolx4(true, false, false, false), v, t);
-            break :blk select(boolx4(true, true, false, false), vt, p);
-        },
-        1 => blk: {
-            const qv = select(boolx4(true, false, false, false), q, v);
-            break :blk select(boolx4(true, true, false, false), qv, p);
-        },
-        2 => blk: {
-            const pv = select(boolx4(true, false, false, false), p, v);
-            break :blk select(boolx4(true, true, false, false), pv, t);
-        },
-        3 => blk: {
-            const pq = select(boolx4(true, false, false, false), p, q);
-            break :blk select(boolx4(true, true, false, false), pq, v);
-        },
-        4 => blk: {
-            const tp = select(boolx4(true, false, false, false), t, p);
-            break :blk select(boolx4(true, true, false, false), tp, v);
-        },
-        5 => blk: {
-            const vp = select(boolx4(true, false, false, false), v, p);
-            break :blk select(boolx4(true, true, false, false), vp, q);
-        },
-        else => unreachable,
-    };
-    return select(boolx4(true, true, true, false), rgb, hsv);
-}
-test "zmath.color.hsvToRgb" {
-    const epsilon = 0.0005;
-    try expect(approxEqAbs(f32x4(0.2, 0.4, 0.8, 1.0), hsvToRgb(f32x4(0.6111, 0.75, 0.8, 1.0)), epsilon));
-    try expect(approxEqAbs(f32x4(0.4, 0.2, 0.8, 1.0), hsvToRgb(f32x4(0.7222, 0.75, 0.8, 1.0)), epsilon));
-    try expect(approxEqAbs(f32x4(0.4, 0.8, 0.2, 1.0), hsvToRgb(f32x4(0.2777, 0.75, 0.8, 1.0)), epsilon));
-    try expect(approxEqAbs(f32x4(1.0, 0.0, 0.0, 0.5), hsvToRgb(f32x4(0.0, 1.0, 1.0, 0.5)), epsilon));
-    try expect(approxEqAbs(f32x4(0.0, 1.0, 0.0, 0.25), hsvToRgb(f32x4(0.3333, 1.0, 1.0, 0.25)), epsilon));
-    try expect(approxEqAbs(f32x4(0.0, 0.0, 1.0, 1.0), hsvToRgb(f32x4(0.6666, 1.0, 1.0, 1.0)), epsilon));
-    try expect(approxEqAbs(f32x4(0.0, 0.0, 0.0, 1.0), hsvToRgb(f32x4(0.0, 0.0, 0.0, 1.0)), epsilon));
-    try expect(approxEqAbs(f32x4(1.0, 1.0, 1.0, 1.0), hsvToRgb(f32x4(0.0, 0.0, 1.0, 1.0)), epsilon));
-    try expect(approxEqAbs(
-        hsvToRgb(rgbToHsv(f32x4(0.1839, 0.632, 0.82198, 1.0))),
-        f32x4(0.1839, 0.632, 0.82198, 1.0),
-        epsilon,
-    ));
-    try expect(approxEqAbs(
-        hsvToRgb(rgbToHsv(f32x4(0.82198, 0.1839, 0.632, 1.0))),
-        f32x4(0.82198, 0.1839, 0.632, 1.0),
-        epsilon,
-    ));
-    try expect(approxEqAbs(
-        rgbToHsv(hsvToRgb(f32x4(0.82198, 0.1839, 0.632, 1.0))),
-        f32x4(0.82198, 0.1839, 0.632, 1.0),
-        epsilon,
-    ));
-    try expect(approxEqAbs(
-        rgbToHsv(hsvToRgb(f32x4(0.1839, 0.82198, 0.632, 1.0))),
-        f32x4(0.1839, 0.82198, 0.632, 1.0),
-        epsilon,
-    ));
-}
-
-pub fn rgbToSrgb(rgb: F32x4) F32x4 {
-    const static = struct {
-        const cutoff = f32x4(0.0031308, 0.0031308, 0.0031308, 1.0);
-        const linear = f32x4(12.92, 12.92, 12.92, 1.0);
-        const scale = f32x4(1.055, 1.055, 1.055, 1.0);
-        const bias = f32x4(0.055, 0.055, 0.055, 1.0);
-        const rgamma = 1.0 / 2.4;
-    };
-    var v = saturate(rgb);
-    const v0 = v * static.linear;
-    const v1 = static.scale * f32x4(
-        math.pow(f32, v[0], static.rgamma),
-        math.pow(f32, v[1], static.rgamma),
-        math.pow(f32, v[2], static.rgamma),
-        v[3],
-    ) - static.bias;
-    v = select(v < static.cutoff, v0, v1);
-    return select(boolx4(true, true, true, false), v, rgb);
-}
-test "zmath.color.rgbToSrgb" {
-    const epsilon = 0.001;
-    try expect(approxEqAbs(rgbToSrgb(f32x4(0.2, 0.4, 0.8, 1.0)), f32x4(0.484, 0.665, 0.906, 1.0), epsilon));
-}
-
-pub fn srgbToRgb(srgb: F32x4) F32x4 {
-    const static = struct {
-        const cutoff = f32x4(0.04045, 0.04045, 0.04045, 1.0);
-        const rlinear = f32x4(1.0 / 12.92, 1.0 / 12.92, 1.0 / 12.92, 1.0);
-        const scale = f32x4(1.0 / 1.055, 1.0 / 1.055, 1.0 / 1.055, 1.0);
-        const bias = f32x4(0.055, 0.055, 0.055, 1.0);
-        const gamma = 2.4;
-    };
-    var v = saturate(srgb);
-    const v0 = v * static.rlinear;
-    var v1 = static.scale * (v + static.bias);
-    v1 = f32x4(
-        math.pow(f32, v1[0], static.gamma),
-        math.pow(f32, v1[1], static.gamma),
-        math.pow(f32, v1[2], static.gamma),
-        v1[3],
-    );
-    v = select(v > static.cutoff, v1, v0);
-    return select(boolx4(true, true, true, false), v, srgb);
-}
-test "zmath.color.srgbToRgb" {
-    const epsilon = 0.0007;
-    try expect(approxEqAbs(f32x4(0.2, 0.4, 0.8, 1.0), srgbToRgb(f32x4(0.484, 0.665, 0.906, 1.0)), epsilon));
-    try expect(approxEqAbs(
-        rgbToSrgb(srgbToRgb(f32x4(0.1839, 0.82198, 0.632, 1.0))),
-        f32x4(0.1839, 0.82198, 0.632, 1.0),
-        epsilon,
-    ));
-}
-// ------------------------------------------------------------------------------
-//
-// X. Misc functions
-//
-// ------------------------------------------------------------------------------
-pub fn linePointDistance(linept0: Vec, linept1: Vec, pt: Vec) F32x4 {
-    const ptvec = pt - linept0;
-    const linevec = linept1 - linept0;
-    const scale = dot3(ptvec, linevec) / lengthSq3(linevec);
-    return length3(ptvec - linevec * scale);
-}
-test "zmath.linePointDistance" {
-    {
-        const linept0 = f32x4(-1.0, -2.0, -3.0, 1.0);
-        const linept1 = f32x4(1.0, 2.0, 3.0, 1.0);
-        const pt = f32x4(1.0, 1.0, 1.0, 1.0);
-        var v = linePointDistance(linept0, linept1, pt);
-        try expect(approxEqAbs(v, splat(F32x4, 0.654), 0.001));
-    }
-}
-
-fn sin32(v: f32) f32 {
-    var y = v - math.tau * @round(v * 1.0 / math.tau);
-
-    if (y > 0.5 * math.pi) {
-        y = math.pi - y;
-    } else if (y < -math.pi * 0.5) {
-        y = -math.pi - y;
-    }
-    const y2 = y * y;
-
-    // 11-degree minimax approximation
-    var sinv = mulAdd(@as(f32, -2.3889859e-08), y2, 2.7525562e-06);
-    sinv = mulAdd(sinv, y2, -0.00019840874);
-    sinv = mulAdd(sinv, y2, 0.0083333310);
-    sinv = mulAdd(sinv, y2, -0.16666667);
-    return y * mulAdd(sinv, y2, 1.0);
-}
-fn cos32(v: f32) f32 {
-    var y = v - math.tau * @round(v * 1.0 / math.tau);
-
-    const sign = blk: {
-        if (y > 0.5 * math.pi) {
-            y = math.pi - y;
-            break :blk @as(f32, -1.0);
-        } else if (y < -math.pi * 0.5) {
-            y = -math.pi - y;
-            break :blk @as(f32, -1.0);
-        } else {
-            break :blk @as(f32, 1.0);
-        }
-    };
-    const y2 = y * y;
-
-    // 10-degree minimax approximation
-    var cosv = mulAdd(@as(f32, -2.6051615e-07), y2, 2.4760495e-05);
-    cosv = mulAdd(cosv, y2, -0.0013888378);
-    cosv = mulAdd(cosv, y2, 0.041666638);
-    cosv = mulAdd(cosv, y2, -0.5);
-    return sign * mulAdd(cosv, y2, 1.0);
-}
-fn sincos32(v: f32) [2]f32 {
-    var y = v - math.tau * @round(v * 1.0 / math.tau);
-
-    const sign = blk: {
-        if (y > 0.5 * math.pi) {
-            y = math.pi - y;
-            break :blk @as(f32, -1.0);
-        } else if (y < -math.pi * 0.5) {
-            y = -math.pi - y;
-            break :blk @as(f32, -1.0);
-        } else {
-            break :blk @as(f32, 1.0);
-        }
-    };
-    const y2 = y * y;
-
-    // 11-degree minimax approximation
-    var sinv = mulAdd(@as(f32, -2.3889859e-08), y2, 2.7525562e-06);
-    sinv = mulAdd(sinv, y2, -0.00019840874);
-    sinv = mulAdd(sinv, y2, 0.0083333310);
-    sinv = mulAdd(sinv, y2, -0.16666667);
-    sinv = y * mulAdd(sinv, y2, 1.0);
-
-    // 10-degree minimax approximation
-    var cosv = mulAdd(@as(f32, -2.6051615e-07), y2, 2.4760495e-05);
-    cosv = mulAdd(cosv, y2, -0.0013888378);
-    cosv = mulAdd(cosv, y2, 0.041666638);
-    cosv = mulAdd(cosv, y2, -0.5);
-    cosv = sign * mulAdd(cosv, y2, 1.0);
-
-    return .{ sinv, cosv };
-}
-test "zmath.sincos32" {
-    const epsilon = 0.0001;
-
-    try expect(math.isNan(sincos32(math.inf_f32)[0]));
-    try expect(math.isNan(sincos32(math.inf_f32)[1]));
-    try expect(math.isNan(sincos32(-math.inf_f32)[0]));
-    try expect(math.isNan(sincos32(-math.inf_f32)[1]));
-    try expect(math.isNan(sincos32(math.nan_f32)[0]));
-    try expect(math.isNan(sincos32(-math.nan_f32)[1]));
-
-    try expect(math.isNan(sin32(math.inf_f32)));
-    try expect(math.isNan(cos32(math.inf_f32)));
-    try expect(math.isNan(sin32(-math.inf_f32)));
-    try expect(math.isNan(cos32(-math.inf_f32)));
-    try expect(math.isNan(sin32(math.nan_f32)));
-    try expect(math.isNan(cos32(-math.nan_f32)));
-
-    var f: f32 = -100.0;
-    var i: u32 = 0;
-    while (i < 100) : (i += 1) {
-        const sc = sincos32(f);
-        const s0 = sin32(f);
-        const c0 = cos32(f);
-        const s = @sin(f);
-        const c = @cos(f);
-        try expect(math.approxEqAbs(f32, sc[0], s, epsilon));
-        try expect(math.approxEqAbs(f32, sc[1], c, epsilon));
-        try expect(math.approxEqAbs(f32, s0, s, epsilon));
-        try expect(math.approxEqAbs(f32, c0, c, epsilon));
-        f += 0.12345 * @intToFloat(f32, i);
-    }
-}
-
-fn asin32(v: f32) f32 {
-    const x = @fabs(v);
-    var omx = 1.0 - x;
-    if (omx < 0.0) {
-        omx = 0.0;
-    }
-    const root = @sqrt(omx);
-
-    // 7-degree minimax approximation
-    var result = mulAdd(@as(f32, -0.0012624911), x, 0.0066700901);
-    result = mulAdd(result, x, -0.0170881256);
-    result = mulAdd(result, x, 0.0308918810);
-    result = mulAdd(result, x, -0.0501743046);
-    result = mulAdd(result, x, 0.0889789874);
-    result = mulAdd(result, x, -0.2145988016);
-    result = root * mulAdd(result, x, 1.5707963050);
-
-    return if (v >= 0.0) 0.5 * math.pi - result else result - 0.5 * math.pi;
-}
-test "zmath.asin32" {
-    const epsilon = 0.0001;
-
-    try expect(math.approxEqAbs(f32, asin(@as(f32, -1.1)), -0.5 * math.pi, epsilon));
-    try expect(math.approxEqAbs(f32, asin(@as(f32, 1.1)), 0.5 * math.pi, epsilon));
-    try expect(math.approxEqAbs(f32, asin(@as(f32, -1000.1)), -0.5 * math.pi, epsilon));
-    try expect(math.approxEqAbs(f32, asin(@as(f32, 100000.1)), 0.5 * math.pi, epsilon));
-    try expect(math.isNan(asin(math.inf_f32)));
-    try expect(math.isNan(asin(-math.inf_f32)));
-    try expect(math.isNan(asin(math.nan_f32)));
-    try expect(math.isNan(asin(-math.nan_f32)));
-
-    try expect(approxEqAbs(asin(splat(F32x8, -100.0)), splat(F32x8, -0.5 * math.pi), epsilon));
-    try expect(approxEqAbs(asin(splat(F32x16, 100.0)), splat(F32x16, 0.5 * math.pi), epsilon));
-    try expect(all(isNan(asin(splat(F32x4, math.inf_f32))), 0) == true);
-    try expect(all(isNan(asin(splat(F32x4, -math.inf_f32))), 0) == true);
-    try expect(all(isNan(asin(splat(F32x4, math.nan_f32))), 0) == true);
-    try expect(all(isNan(asin(splat(F32x4, math.qnan_f32))), 0) == true);
-
-    var f: f32 = -1.0;
-    var i: u32 = 0;
-    while (i < 8) : (i += 1) {
-        const r0 = asin32(f);
-        const r1 = math.asin(f);
-        const r4 = asin(splat(F32x4, f));
-        const r8 = asin(splat(F32x8, f));
-        const r16 = asin(splat(F32x16, f));
-        try expect(math.approxEqAbs(f32, r0, r1, epsilon));
-        try expect(approxEqAbs(r4, splat(F32x4, r1), epsilon));
-        try expect(approxEqAbs(r8, splat(F32x8, r1), epsilon));
-        try expect(approxEqAbs(r16, splat(F32x16, r1), epsilon));
-        f += 0.09 * @intToFloat(f32, i);
-    }
-}
-
-fn acos32(v: f32) f32 {
-    const x = @fabs(v);
-    var omx = 1.0 - x;
-    if (omx < 0.0) {
-        omx = 0.0;
-    }
-    const root = @sqrt(omx);
-
-    // 7-degree minimax approximation
-    var result = mulAdd(@as(f32, -0.0012624911), x, 0.0066700901);
-    result = mulAdd(result, x, -0.0170881256);
-    result = mulAdd(result, x, 0.0308918810);
-    result = mulAdd(result, x, -0.0501743046);
-    result = mulAdd(result, x, 0.0889789874);
-    result = mulAdd(result, x, -0.2145988016);
-    result = root * mulAdd(result, x, 1.5707963050);
-
-    return if (v >= 0.0) result else math.pi - result;
-}
-test "zmath.acos32" {
-    const epsilon = 0.1;
-
-    try expect(math.approxEqAbs(f32, acos(@as(f32, -1.1)), math.pi, epsilon));
-    try expect(math.approxEqAbs(f32, acos(@as(f32, -10000.1)), math.pi, epsilon));
-    try expect(math.approxEqAbs(f32, acos(@as(f32, 1.1)), 0.0, epsilon));
-    try expect(math.approxEqAbs(f32, acos(@as(f32, 1000.1)), 0.0, epsilon));
-    try expect(math.isNan(acos(math.inf_f32)));
-    try expect(math.isNan(acos(-math.inf_f32)));
-    try expect(math.isNan(acos(math.nan_f32)));
-    try expect(math.isNan(acos(-math.nan_f32)));
-
-    try expect(approxEqAbs(acos(splat(F32x8, -100.0)), splat(F32x8, math.pi), epsilon));
-    try expect(approxEqAbs(acos(splat(F32x16, 100.0)), splat(F32x16, 0.0), epsilon));
-    try expect(all(isNan(acos(splat(F32x4, math.inf_f32))), 0) == true);
-    try expect(all(isNan(acos(splat(F32x4, -math.inf_f32))), 0) == true);
-    try expect(all(isNan(acos(splat(F32x4, math.nan_f32))), 0) == true);
-    try expect(all(isNan(acos(splat(F32x4, math.qnan_f32))), 0) == true);
-
-    var f: f32 = -1.0;
-    var i: u32 = 0;
-    while (i < 8) : (i += 1) {
-        const r0 = acos32(f);
-        const r1 = math.acos(f);
-        const r4 = acos(splat(F32x4, f));
-        const r8 = acos(splat(F32x8, f));
-        const r16 = acos(splat(F32x16, f));
-        try expect(math.approxEqAbs(f32, r0, r1, epsilon));
-        try expect(approxEqAbs(r4, splat(F32x4, r1), epsilon));
-        try expect(approxEqAbs(r8, splat(F32x8, r1), epsilon));
-        try expect(approxEqAbs(r16, splat(F32x16, r1), epsilon));
-        f += 0.09 * @intToFloat(f32, i);
-    }
-}
-
-pub fn modAngle32(in_angle: f32) f32 {
-    const angle = in_angle + math.pi;
-    var temp: f32 = @fabs(angle);
-    temp = temp - (2.0 * math.pi * @intToFloat(f32, @floatToInt(i32, temp / math.pi)));
-    temp = temp - math.pi;
-    if (angle < 0.0) {
-        temp = -temp;
-    }
-    return temp;
-}
-
-pub fn cmulSoa(re0: anytype, im0: anytype, re1: anytype, im1: anytype) [2]@TypeOf(re0, im0, re1, im1) {
-    const re0_re1 = re0 * re1;
-    const re0_im1 = re0 * im1;
-    return .{
-        mulAdd(-im0, im1, re0_re1), // re
-        mulAdd(re1, im0, re0_im1), // im
-    };
-}
-// ------------------------------------------------------------------------------
-//
-// FFT (implementation based on xdsp.h from DirectXMath)
-//
-// ------------------------------------------------------------------------------
-fn fftButterflyDit4_1(re0: *F32x4, im0: *F32x4) void {
-    const re0l = swizzle(re0.*, .x, .x, .y, .y);
-    const re0h = swizzle(re0.*, .z, .z, .w, .w);
-
-    const im0l = swizzle(im0.*, .x, .x, .y, .y);
-    const im0h = swizzle(im0.*, .z, .z, .w, .w);
-
-    const re_temp = mulAdd(re0h, f32x4(1.0, -1.0, 1.0, -1.0), re0l);
-    const im_temp = mulAdd(im0h, f32x4(1.0, -1.0, 1.0, -1.0), im0l);
-
-    const re_shuf0 = @shuffle(f32, re_temp, im_temp, [4]i32{ 2, 3, ~@as(i32, 2), ~@as(i32, 3) });
-    const re_shuf = swizzle(re_shuf0, .x, .w, .x, .w);
-    const im_shuf = swizzle(re_shuf0, .z, .y, .z, .y);
-
-    const re_templ = swizzle(re_temp, .x, .y, .x, .y);
-    const im_templ = swizzle(im_temp, .x, .y, .x, .y);
-
-    re0.* = mulAdd(re_shuf, f32x4(1.0, 1.0, -1.0, -1.0), re_templ);
-    im0.* = mulAdd(im_shuf, f32x4(1.0, -1.0, -1.0, 1.0), im_templ);
-}
-
-fn fftButterflyDit4_4(
-    re0: *F32x4,
-    re1: *F32x4,
-    re2: *F32x4,
-    re3: *F32x4,
-    im0: *F32x4,
-    im1: *F32x4,
-    im2: *F32x4,
-    im3: *F32x4,
-    unity_table_re: []const F32x4,
-    unity_table_im: []const F32x4,
-    stride: u32,
-    last: bool,
-) void {
-    const re_temp0 = re0.* + re2.*;
-    const im_temp0 = im0.* + im2.*;
-
-    const re_temp2 = re1.* + re3.*;
-    const im_temp2 = im1.* + im3.*;
-
-    const re_temp1 = re0.* - re2.*;
-    const im_temp1 = im0.* - im2.*;
-
-    const re_temp3 = re1.* - re3.*;
-    const im_temp3 = im1.* - im3.*;
-
-    var re_temp4 = re_temp0 + re_temp2;
-    var im_temp4 = im_temp0 + im_temp2;
-
-    var re_temp5 = re_temp1 + im_temp3;
-    var im_temp5 = im_temp1 - re_temp3;
-
-    var re_temp6 = re_temp0 - re_temp2;
-    var im_temp6 = im_temp0 - im_temp2;
-
-    var re_temp7 = re_temp1 - im_temp3;
-    var im_temp7 = im_temp1 + re_temp3;
-
-    {
-        const re_im = cmulSoa(re_temp5, im_temp5, unity_table_re[stride], unity_table_im[stride]);
-        re_temp5 = re_im[0];
-        im_temp5 = re_im[1];
-    }
-    {
-        const re_im = cmulSoa(re_temp6, im_temp6, unity_table_re[stride * 2], unity_table_im[stride * 2]);
-        re_temp6 = re_im[0];
-        im_temp6 = re_im[1];
-    }
-    {
-        const re_im = cmulSoa(re_temp7, im_temp7, unity_table_re[stride * 3], unity_table_im[stride * 3]);
-        re_temp7 = re_im[0];
-        im_temp7 = re_im[1];
-    }
-
-    if (last) {
-        fftButterflyDit4_1(&re_temp4, &im_temp4);
-        fftButterflyDit4_1(&re_temp5, &im_temp5);
-        fftButterflyDit4_1(&re_temp6, &im_temp6);
-        fftButterflyDit4_1(&re_temp7, &im_temp7);
-    }
-
-    re0.* = re_temp4;
-    im0.* = im_temp4;
-
-    re1.* = re_temp5;
-    im1.* = im_temp5;
-
-    re2.* = re_temp6;
-    im2.* = im_temp6;
-
-    re3.* = re_temp7;
-    im3.* = im_temp7;
-}
-
-fn fft4(re: []F32x4, im: []F32x4, count: u32) void {
-    assert(std.math.isPowerOfTwo(count));
-    assert(re.len >= count);
-    assert(im.len >= count);
-
-    var index: u32 = 0;
-    while (index < count) : (index += 1) {
-        fftButterflyDit4_1(&re[index], &im[index]);
-    }
-}
-test "zmath.fft4" {
-    const epsilon = 0.0001;
-    var re = [_]F32x4{f32x4(1.0, 2.0, 3.0, 4.0)};
-    var im = [_]F32x4{f32x4s(0.0)};
-    fft4(re[0..], im[0..], 1);
-
-    var re_uns: [1]F32x4 = undefined;
-    var im_uns: [1]F32x4 = undefined;
-    fftUnswizzle(re[0..], re_uns[0..]);
-    fftUnswizzle(im[0..], im_uns[0..]);
-
-    try expect(approxEqAbs(re_uns[0], f32x4(10.0, -2.0, -2.0, -2.0), epsilon));
-    try expect(approxEqAbs(im_uns[0], f32x4(0.0, 2.0, 0.0, -2.0), epsilon));
-}
-
-fn fft8(re: []F32x4, im: []F32x4, count: u32) void {
-    assert(std.math.isPowerOfTwo(count));
-    assert(re.len >= 2 * count);
-    assert(im.len >= 2 * count);
-
-    var index: u32 = 0;
-    while (index < count) : (index += 1) {
-        var pre = re[index * 2 ..];
-        var pim = im[index * 2 ..];
-
-        var odds_re = @shuffle(f32, pre[0], pre[1], [4]i32{ 1, 3, ~@as(i32, 1), ~@as(i32, 3) });
-        var evens_re = @shuffle(f32, pre[0], pre[1], [4]i32{ 0, 2, ~@as(i32, 0), ~@as(i32, 2) });
-        var odds_im = @shuffle(f32, pim[0], pim[1], [4]i32{ 1, 3, ~@as(i32, 1), ~@as(i32, 3) });
-        var evens_im = @shuffle(f32, pim[0], pim[1], [4]i32{ 0, 2, ~@as(i32, 0), ~@as(i32, 2) });
-        fftButterflyDit4_1(&odds_re, &odds_im);
-        fftButterflyDit4_1(&evens_re, &evens_im);
-
-        {
-            const re_im = cmulSoa(
-                odds_re,
-                odds_im,
-                f32x4(1.0, 0.70710677, 0.0, -0.70710677),
-                f32x4(0.0, -0.70710677, -1.0, -0.70710677),
-            );
-            pre[0] = evens_re + re_im[0];
-            pim[0] = evens_im + re_im[1];
-        }
-        {
-            const re_im = cmulSoa(
-                odds_re,
-                odds_im,
-                f32x4(-1.0, -0.70710677, 0.0, 0.70710677),
-                f32x4(0.0, 0.70710677, 1.0, 0.70710677),
-            );
-            pre[1] = evens_re + re_im[0];
-            pim[1] = evens_im + re_im[1];
-        }
-    }
-}
-test "zmath.fft8" {
-    const epsilon = 0.0001;
-    var re = [_]F32x4{ f32x4(1.0, 2.0, 3.0, 4.0), f32x4(5.0, 6.0, 7.0, 8.0) };
-    var im = [_]F32x4{ f32x4s(0.0), f32x4s(0.0) };
-    fft8(re[0..], im[0..], 1);
-
-    var re_uns: [2]F32x4 = undefined;
-    var im_uns: [2]F32x4 = undefined;
-    fftUnswizzle(re[0..], re_uns[0..]);
-    fftUnswizzle(im[0..], im_uns[0..]);
-
-    try expect(approxEqAbs(re_uns[0], f32x4(36.0, -4.0, -4.0, -4.0), epsilon));
-    try expect(approxEqAbs(re_uns[1], f32x4(-4.0, -4.0, -4.0, -4.0), epsilon));
-    try expect(approxEqAbs(im_uns[0], f32x4(0.0, 9.656854, 4.0, 1.656854), epsilon));
-    try expect(approxEqAbs(im_uns[1], f32x4(0.0, -1.656854, -4.0, -9.656854), epsilon));
-}
-
-fn fft16(re: []F32x4, im: []F32x4, count: u32) void {
-    assert(std.math.isPowerOfTwo(count));
-    assert(re.len >= 4 * count);
-    assert(im.len >= 4 * count);
-
-    const static = struct {
-        const unity_table_re = [4]F32x4{
-            f32x4(1.0, 1.0, 1.0, 1.0),
-            f32x4(1.0, 0.92387950, 0.70710677, 0.38268343),
-            f32x4(1.0, 0.70710677, -4.3711388e-008, -0.70710677),
-            f32x4(1.0, 0.38268343, -0.70710677, -0.92387950),
-        };
-        const unity_table_im = [4]F32x4{
-            f32x4(-0.0, -0.0, -0.0, -0.0),
-            f32x4(-0.0, -0.38268343, -0.70710677, -0.92387950),
-            f32x4(-0.0, -0.70710677, -1.0, -0.70710677),
-            f32x4(-0.0, -0.92387950, -0.70710677, 0.38268343),
-        };
-    };
-
-    var index: u32 = 0;
-    while (index < count) : (index += 1) {
-        fftButterflyDit4_4(
-            &re[index * 4],
-            &re[index * 4 + 1],
-            &re[index * 4 + 2],
-            &re[index * 4 + 3],
-            &im[index * 4],
-            &im[index * 4 + 1],
-            &im[index * 4 + 2],
-            &im[index * 4 + 3],
-            static.unity_table_re[0..],
-            static.unity_table_im[0..],
-            1,
-            true,
-        );
-    }
-}
-test "zmath.fft16" {
-    const epsilon = 0.0001;
-    var re = [_]F32x4{
-        f32x4(1.0, 2.0, 3.0, 4.0),
-        f32x4(5.0, 6.0, 7.0, 8.0),
-        f32x4(9.0, 10.0, 11.0, 12.0),
-        f32x4(13.0, 14.0, 15.0, 16.0),
-    };
-    var im = [_]F32x4{ f32x4s(0.0), f32x4s(0.0), f32x4s(0.0), f32x4s(0.0) };
-    fft16(re[0..], im[0..], 1);
-
-    var re_uns: [4]F32x4 = undefined;
-    var im_uns: [4]F32x4 = undefined;
-    fftUnswizzle(re[0..], re_uns[0..]);
-    fftUnswizzle(im[0..], im_uns[0..]);
-
-    try expect(approxEqAbs(re_uns[0], f32x4(136.0, -8.0, -8.0, -8.0), epsilon));
-    try expect(approxEqAbs(re_uns[1], f32x4(-8.0, -8.0, -8.0, -8.0), epsilon));
-    try expect(approxEqAbs(re_uns[2], f32x4(-8.0, -8.0, -8.0, -8.0), epsilon));
-    try expect(approxEqAbs(re_uns[3], f32x4(-8.0, -8.0, -8.0, -8.0), epsilon));
-    try expect(approxEqAbs(im_uns[0], f32x4(0.0, 40.218716, 19.313708, 11.972846), epsilon));
-    try expect(approxEqAbs(im_uns[1], f32x4(8.0, 5.345429, 3.313708, 1.591299), epsilon));
-    try expect(approxEqAbs(im_uns[2], f32x4(0.0, -1.591299, -3.313708, -5.345429), epsilon));
-    try expect(approxEqAbs(im_uns[3], f32x4(-8.0, -11.972846, -19.313708, -40.218716), epsilon));
-}
-
-fn fftN(re: []F32x4, im: []F32x4, unity_table: []const F32x4, length: u32, count: u32) void {
-    assert(length > 16);
-    assert(std.math.isPowerOfTwo(length));
-    assert(std.math.isPowerOfTwo(count));
-    assert(re.len >= length * count / 4);
-    assert(re.len == im.len);
-
-    const total = count * length;
-    const total_vectors = total / 4;
-    const stage_vectors = length / 4;
-    const stage_vectors_mask = stage_vectors - 1;
-    const stride = length / 16;
-    const stride_mask = stride - 1;
-    const stride_inv_mask = ~stride_mask;
-
-    var unity_table_re = unity_table;
-    var unity_table_im = unity_table[length / 4 ..];
-
-    var index: u32 = 0;
-    while (index < total_vectors / 4) : (index += 1) {
-        const n = (index & stride_inv_mask) * 4 + (index & stride_mask);
-        fftButterflyDit4_4(
-            &re[n],
-            &re[n + stride],
-            &re[n + stride * 2],
-            &re[n + stride * 3],
-            &im[n],
-            &im[n + stride],
-            &im[n + stride * 2],
-            &im[n + stride * 3],
-            unity_table_re[(n & stage_vectors_mask)..],
-            unity_table_im[(n & stage_vectors_mask)..],
-            stride,
-            false,
-        );
-    }
-
-    if (length > 16 * 4) {
-        fftN(re, im, unity_table[(length / 2)..], length / 4, count * 4);
-    } else if (length == 16 * 4) {
-        fft16(re, im, count * 4);
-    } else if (length == 8 * 4) {
-        fft8(re, im, count * 4);
-    } else if (length == 4 * 4) {
-        fft4(re, im, count * 4);
-    }
-}
-test "zmath.fftN" {
-    var unity_table: [128]F32x4 = undefined;
-    const epsilon = 0.0001;
-
-    // 32 samples
-    {
-        var re = [_]F32x4{
-            f32x4(1.0, 2.0, 3.0, 4.0),     f32x4(5.0, 6.0, 7.0, 8.0),
-            f32x4(9.0, 10.0, 11.0, 12.0),  f32x4(13.0, 14.0, 15.0, 16.0),
-            f32x4(17.0, 18.0, 19.0, 20.0), f32x4(21.0, 22.0, 23.0, 24.0),
-            f32x4(25.0, 26.0, 27.0, 28.0), f32x4(29.0, 30.0, 31.0, 32.0),
-        };
-        var im = [_]F32x4{
-            f32x4s(0.0), f32x4s(0.0), f32x4s(0.0), f32x4s(0.0),
-            f32x4s(0.0), f32x4s(0.0), f32x4s(0.0), f32x4s(0.0),
-        };
-
-        fftInitUnityTable(unity_table[0..32]);
-        fft(re[0..], im[0..], unity_table[0..32]);
-
-        try expect(approxEqAbs(re[0], f32x4(528.0, -16.0, -16.0, -16.0), epsilon));
-        try expect(approxEqAbs(re[1], f32x4(-16.0, -16.0, -16.0, -16.0), epsilon));
-        try expect(approxEqAbs(re[2], f32x4(-16.0, -16.0, -16.0, -16.0), epsilon));
-        try expect(approxEqAbs(re[3], f32x4(-16.0, -16.0, -16.0, -16.0), epsilon));
-        try expect(approxEqAbs(re[4], f32x4(-16.0, -16.0, -16.0, -16.0), epsilon));
-        try expect(approxEqAbs(re[5], f32x4(-16.0, -16.0, -16.0, -16.0), epsilon));
-        try expect(approxEqAbs(re[6], f32x4(-16.0, -16.0, -16.0, -16.0), epsilon));
-        try expect(approxEqAbs(re[7], f32x4(-16.0, -16.0, -16.0, -16.0), epsilon));
-        try expect(approxEqAbs(im[0], f32x4(0.0, 162.450726, 80.437432, 52.744931), epsilon));
-        try expect(approxEqAbs(im[1], f32x4(38.627417, 29.933895, 23.945692, 19.496056), epsilon));
-        try expect(approxEqAbs(im[2], f32x4(16.0, 13.130861, 10.690858, 8.552178), epsilon));
-        try expect(approxEqAbs(im[3], f32x4(6.627417, 4.853547, 3.182598, 1.575862), epsilon));
-        try expect(approxEqAbs(im[4], f32x4(0.0, -1.575862, -3.182598, -4.853547), epsilon));
-        try expect(approxEqAbs(im[5], f32x4(-6.627417, -8.552178, -10.690858, -13.130861), epsilon));
-        try expect(approxEqAbs(im[6], f32x4(-16.0, -19.496056, -23.945692, -29.933895), epsilon));
-        try expect(approxEqAbs(im[7], f32x4(-38.627417, -52.744931, -80.437432, -162.450726), epsilon));
-    }
-
-    // 64 samples
-    {
-        var re = [_]F32x4{
-            f32x4(1.0, 2.0, 3.0, 4.0),     f32x4(5.0, 6.0, 7.0, 8.0),
-            f32x4(9.0, 10.0, 11.0, 12.0),  f32x4(13.0, 14.0, 15.0, 16.0),
-            f32x4(17.0, 18.0, 19.0, 20.0), f32x4(21.0, 22.0, 23.0, 24.0),
-            f32x4(25.0, 26.0, 27.0, 28.0), f32x4(29.0, 30.0, 31.0, 32.0),
-            f32x4(1.0, 2.0, 3.0, 4.0),     f32x4(5.0, 6.0, 7.0, 8.0),
-            f32x4(9.0, 10.0, 11.0, 12.0),  f32x4(13.0, 14.0, 15.0, 16.0),
-            f32x4(17.0, 18.0, 19.0, 20.0), f32x4(21.0, 22.0, 23.0, 24.0),
-            f32x4(25.0, 26.0, 27.0, 28.0), f32x4(29.0, 30.0, 31.0, 32.0),
-        };
-        var im = [_]F32x4{
-            f32x4s(0.0), f32x4s(0.0), f32x4s(0.0), f32x4s(0.0),
-            f32x4s(0.0), f32x4s(0.0), f32x4s(0.0), f32x4s(0.0),
-            f32x4s(0.0), f32x4s(0.0), f32x4s(0.0), f32x4s(0.0),
-            f32x4s(0.0), f32x4s(0.0), f32x4s(0.0), f32x4s(0.0),
-        };
-
-        fftInitUnityTable(unity_table[0..64]);
-        fft(re[0..], im[0..], unity_table[0..64]);
-
-        try expect(approxEqAbs(re[0], f32x4(1056.0, 0.0, -32.0, 0.0), epsilon));
-        var i: u32 = 1;
-        while (i < 16) : (i += 1) {
-            try expect(approxEqAbs(re[i], f32x4(-32.0, 0.0, -32.0, 0.0), epsilon));
-        }
-
-        const expected = [_]f32{
-            0.0,        0.0,      324.901452,  0.000000, 160.874864,  0.0,      105.489863,  0.000000,
-            77.254834,  0.0,      59.867789,   0.0,      47.891384,   0.0,      38.992113,   0.0,
-            32.000000,  0.000000, 26.261721,   0.000000, 21.381716,   0.000000, 17.104356,   0.000000,
-            13.254834,  0.000000, 9.707094,    0.000000, 6.365196,    0.000000, 3.151725,    0.000000,
-            0.000000,   0.000000, -3.151725,   0.000000, -6.365196,   0.000000, -9.707094,   0.000000,
-            -13.254834, 0.000000, -17.104356,  0.000000, -21.381716,  0.000000, -26.261721,  0.000000,
-            -32.000000, 0.000000, -38.992113,  0.000000, -47.891384,  0.000000, -59.867789,  0.000000,
-            -77.254834, 0.000000, -105.489863, 0.000000, -160.874864, 0.000000, -324.901452, 0.000000,
-        };
-        for (expected, 0..) |e, ie| {
-            try expect(std.math.approxEqAbs(f32, e, im[(ie / 4)][ie % 4], epsilon));
-        }
-    }
-
-    // 128 samples
-    {
-        var re = [_]F32x4{
-            f32x4(1.0, 2.0, 3.0, 4.0),     f32x4(5.0, 6.0, 7.0, 8.0),
-            f32x4(9.0, 10.0, 11.0, 12.0),  f32x4(13.0, 14.0, 15.0, 16.0),
-            f32x4(17.0, 18.0, 19.0, 20.0), f32x4(21.0, 22.0, 23.0, 24.0),
-            f32x4(25.0, 26.0, 27.0, 28.0), f32x4(29.0, 30.0, 31.0, 32.0),
-            f32x4(1.0, 2.0, 3.0, 4.0),     f32x4(5.0, 6.0, 7.0, 8.0),
-            f32x4(9.0, 10.0, 11.0, 12.0),  f32x4(13.0, 14.0, 15.0, 16.0),
-            f32x4(17.0, 18.0, 19.0, 20.0), f32x4(21.0, 22.0, 23.0, 24.0),
-            f32x4(25.0, 26.0, 27.0, 28.0), f32x4(29.0, 30.0, 31.0, 32.0),
-            f32x4(1.0, 2.0, 3.0, 4.0),     f32x4(5.0, 6.0, 7.0, 8.0),
-            f32x4(9.0, 10.0, 11.0, 12.0),  f32x4(13.0, 14.0, 15.0, 16.0),
-            f32x4(17.0, 18.0, 19.0, 20.0), f32x4(21.0, 22.0, 23.0, 24.0),
-            f32x4(25.0, 26.0, 27.0, 28.0), f32x4(29.0, 30.0, 31.0, 32.0),
-            f32x4(1.0, 2.0, 3.0, 4.0),     f32x4(5.0, 6.0, 7.0, 8.0),
-            f32x4(9.0, 10.0, 11.0, 12.0),  f32x4(13.0, 14.0, 15.0, 16.0),
-            f32x4(17.0, 18.0, 19.0, 20.0), f32x4(21.0, 22.0, 23.0, 24.0),
-            f32x4(25.0, 26.0, 27.0, 28.0), f32x4(29.0, 30.0, 31.0, 32.0),
-        };
-        var im = [_]F32x4{
-            f32x4s(0.0), f32x4s(0.0), f32x4s(0.0), f32x4s(0.0),
-            f32x4s(0.0), f32x4s(0.0), f32x4s(0.0), f32x4s(0.0),
-            f32x4s(0.0), f32x4s(0.0), f32x4s(0.0), f32x4s(0.0),
-            f32x4s(0.0), f32x4s(0.0), f32x4s(0.0), f32x4s(0.0),
-            f32x4s(0.0), f32x4s(0.0), f32x4s(0.0), f32x4s(0.0),
-            f32x4s(0.0), f32x4s(0.0), f32x4s(0.0), f32x4s(0.0),
-            f32x4s(0.0), f32x4s(0.0), f32x4s(0.0), f32x4s(0.0),
-            f32x4s(0.0), f32x4s(0.0), f32x4s(0.0), f32x4s(0.0),
-        };
-
-        fftInitUnityTable(unity_table[0..128]);
-        fft(re[0..], im[0..], unity_table[0..128]);
-
-        try expect(approxEqAbs(re[0], f32x4(2112.0, 0.0, 0.0, 0.0), epsilon));
-        var i: u32 = 1;
-        while (i < 32) : (i += 1) {
-            try expect(approxEqAbs(re[i], f32x4(-64.0, 0.0, 0.0, 0.0), epsilon));
-        }
-
-        const expected = [_]f32{
-            0.000000,    0.000000, 0.000000, 0.000000, 649.802905,  0.000000, 0.000000, 0.000000,
-            321.749727,  0.000000, 0.000000, 0.000000, 210.979725,  0.000000, 0.000000, 0.000000,
-            154.509668,  0.000000, 0.000000, 0.000000, 119.735578,  0.000000, 0.000000, 0.000000,
-            95.782769,   0.000000, 0.000000, 0.000000, 77.984226,   0.000000, 0.000000, 0.000000,
-            64.000000,   0.000000, 0.000000, 0.000000, 52.523443,   0.000000, 0.000000, 0.000000,
-            42.763433,   0.000000, 0.000000, 0.000000, 34.208713,   0.000000, 0.000000, 0.000000,
-            26.509668,   0.000000, 0.000000, 0.000000, 19.414188,   0.000000, 0.000000, 0.000000,
-            12.730392,   0.000000, 0.000000, 0.000000, 6.303450,    0.000000, 0.000000, 0.000000,
-            0.000000,    0.000000, 0.000000, 0.000000, -6.303450,   0.000000, 0.000000, 0.000000,
-            -12.730392,  0.000000, 0.000000, 0.000000, -19.414188,  0.000000, 0.000000, 0.000000,
-            -26.509668,  0.000000, 0.000000, 0.000000, -34.208713,  0.000000, 0.000000, 0.000000,
-            -42.763433,  0.000000, 0.000000, 0.000000, -52.523443,  0.000000, 0.000000, 0.000000,
-            -64.000000,  0.000000, 0.000000, 0.000000, -77.984226,  0.000000, 0.000000, 0.000000,
-            -95.782769,  0.000000, 0.000000, 0.000000, -119.735578, 0.000000, 0.000000, 0.000000,
-            -154.509668, 0.000000, 0.000000, 0.000000, -210.979725, 0.000000, 0.000000, 0.000000,
-            -321.749727, 0.000000, 0.000000, 0.000000, -649.802905, 0.000000, 0.000000, 0.000000,
-        };
-        for (expected, 0..) |e, ie| {
-            try expect(std.math.approxEqAbs(f32, e, im[(ie / 4)][ie % 4], epsilon));
-        }
-    }
-}
-
-fn fftUnswizzle(input: []const F32x4, output: []F32x4) void {
-    assert(std.math.isPowerOfTwo(input.len));
-    assert(input.len == output.len);
-    assert(input.ptr != output.ptr);
-
-    const log2_length = std.math.log2_int(usize, input.len * 4);
-    assert(log2_length >= 2);
-
-    const length = input.len;
-
-    const f32_output = @ptrCast([*]f32, output.ptr)[0 .. output.len * 4];
-
-    const static = struct {
-        const swizzle_table = [256]u8{
-            0x00, 0x40, 0x80, 0xC0, 0x10, 0x50, 0x90, 0xD0, 0x20, 0x60, 0xA0, 0xE0, 0x30, 0x70, 0xB0, 0xF0,
-            0x04, 0x44, 0x84, 0xC4, 0x14, 0x54, 0x94, 0xD4, 0x24, 0x64, 0xA4, 0xE4, 0x34, 0x74, 0xB4, 0xF4,
-            0x08, 0x48, 0x88, 0xC8, 0x18, 0x58, 0x98, 0xD8, 0x28, 0x68, 0xA8, 0xE8, 0x38, 0x78, 0xB8, 0xF8,
-            0x0C, 0x4C, 0x8C, 0xCC, 0x1C, 0x5C, 0x9C, 0xDC, 0x2C, 0x6C, 0xAC, 0xEC, 0x3C, 0x7C, 0xBC, 0xFC,
-            0x01, 0x41, 0x81, 0xC1, 0x11, 0x51, 0x91, 0xD1, 0x21, 0x61, 0xA1, 0xE1, 0x31, 0x71, 0xB1, 0xF1,
-            0x05, 0x45, 0x85, 0xC5, 0x15, 0x55, 0x95, 0xD5, 0x25, 0x65, 0xA5, 0xE5, 0x35, 0x75, 0xB5, 0xF5,
-            0x09, 0x49, 0x89, 0xC9, 0x19, 0x59, 0x99, 0xD9, 0x29, 0x69, 0xA9, 0xE9, 0x39, 0x79, 0xB9, 0xF9,
-            0x0D, 0x4D, 0x8D, 0xCD, 0x1D, 0x5D, 0x9D, 0xDD, 0x2D, 0x6D, 0xAD, 0xED, 0x3D, 0x7D, 0xBD, 0xFD,
-            0x02, 0x42, 0x82, 0xC2, 0x12, 0x52, 0x92, 0xD2, 0x22, 0x62, 0xA2, 0xE2, 0x32, 0x72, 0xB2, 0xF2,
-            0x06, 0x46, 0x86, 0xC6, 0x16, 0x56, 0x96, 0xD6, 0x26, 0x66, 0xA6, 0xE6, 0x36, 0x76, 0xB6, 0xF6,
-            0x0A, 0x4A, 0x8A, 0xCA, 0x1A, 0x5A, 0x9A, 0xDA, 0x2A, 0x6A, 0xAA, 0xEA, 0x3A, 0x7A, 0xBA, 0xFA,
-            0x0E, 0x4E, 0x8E, 0xCE, 0x1E, 0x5E, 0x9E, 0xDE, 0x2E, 0x6E, 0xAE, 0xEE, 0x3E, 0x7E, 0xBE, 0xFE,
-            0x03, 0x43, 0x83, 0xC3, 0x13, 0x53, 0x93, 0xD3, 0x23, 0x63, 0xA3, 0xE3, 0x33, 0x73, 0xB3, 0xF3,
-            0x07, 0x47, 0x87, 0xC7, 0x17, 0x57, 0x97, 0xD7, 0x27, 0x67, 0xA7, 0xE7, 0x37, 0x77, 0xB7, 0xF7,
-            0x0B, 0x4B, 0x8B, 0xCB, 0x1B, 0x5B, 0x9B, 0xDB, 0x2B, 0x6B, 0xAB, 0xEB, 0x3B, 0x7B, 0xBB, 0xFB,
-            0x0F, 0x4F, 0x8F, 0xCF, 0x1F, 0x5F, 0x9F, 0xDF, 0x2F, 0x6F, 0xAF, 0xEF, 0x3F, 0x7F, 0xBF, 0xFF,
-        };
-    };
-
-    if ((log2_length & 1) == 0) {
-        const rev32 = @intCast(u6, 32 - log2_length);
-        var index: usize = 0;
-        while (index < length) : (index += 1) {
-            const n = index * 4;
-            const addr =
-                (@intCast(usize, static.swizzle_table[n & 0xff]) << 24) |
-                (@intCast(usize, static.swizzle_table[(n >> 8) & 0xff]) << 16) |
-                (@intCast(usize, static.swizzle_table[(n >> 16) & 0xff]) << 8) |
-                @intCast(usize, static.swizzle_table[(n >> 24) & 0xff]);
-            f32_output[addr >> rev32] = input[index][0];
-            f32_output[(0x40000000 | addr) >> rev32] = input[index][1];
-            f32_output[(0x80000000 | addr) >> rev32] = input[index][2];
-            f32_output[(0xC0000000 | addr) >> rev32] = input[index][3];
-        }
-    } else {
-        const rev7 = @as(usize, 1) << @intCast(u6, log2_length - 3);
-        const rev32 = @intCast(u6, 32 - (log2_length - 3));
-        var index: usize = 0;
-        while (index < length) : (index += 1) {
-            const n = index / 2;
-            var addr =
-                (((@intCast(usize, static.swizzle_table[n & 0xff]) << 24) |
-                (@intCast(usize, static.swizzle_table[(n >> 8) & 0xff]) << 16) |
-                (@intCast(usize, static.swizzle_table[(n >> 16) & 0xff]) << 8) |
-                (@intCast(usize, static.swizzle_table[(n >> 24) & 0xff]))) >> rev32) |
-                ((index & 1) * rev7 * 4);
-            f32_output[addr] = input[index][0];
-            addr += rev7;
-            f32_output[addr] = input[index][1];
-            addr += rev7;
-            f32_output[addr] = input[index][2];
-            addr += rev7;
-            f32_output[addr] = input[index][3];
-        }
-    }
-}
-
-pub fn fftInitUnityTable(out_unity_table: []F32x4) void {
-    assert(std.math.isPowerOfTwo(out_unity_table.len));
-    assert(out_unity_table.len >= 32 and out_unity_table.len <= 512);
-
-    var unity_table = out_unity_table;
-
-    const v0123 = f32x4(0.0, 1.0, 2.0, 3.0);
-    var length = out_unity_table.len / 4;
-    var vlstep = f32x4s(0.5 * math.pi / @intToFloat(f32, length));
-
-    while (true) {
-        length /= 4;
-        var vjp = v0123;
-
-        var j: u32 = 0;
-        while (j < length) : (j += 1) {
-            unity_table[j] = f32x4s(1.0);
-            unity_table[j + length * 4] = f32x4s(0.0);
-
-            var vls = vjp * vlstep;
-            var sin_cos = sincos(vls);
-            unity_table[j + length] = sin_cos[1];
-            unity_table[j + length * 5] = sin_cos[0] * f32x4s(-1.0);
-
-            var vijp = vjp + vjp;
-            vls = vijp * vlstep;
-            sin_cos = sincos(vls);
-            unity_table[j + length * 2] = sin_cos[1];
-            unity_table[j + length * 6] = sin_cos[0] * f32x4s(-1.0);
-
-            vijp = vijp + vjp;
-            vls = vijp * vlstep;
-            sin_cos = sincos(vls);
-            unity_table[j + length * 3] = sin_cos[1];
-            unity_table[j + length * 7] = sin_cos[0] * f32x4s(-1.0);
-
-            vjp += f32x4s(4.0);
-        }
-        vlstep *= f32x4s(4.0);
-        unity_table = unity_table[8 * length ..];
-
-        if (length <= 4)
-            break;
-    }
-}
-
-pub fn fft(re: []F32x4, im: []F32x4, unity_table: []const F32x4) void {
-    const length = @intCast(u32, re.len * 4);
-    assert(std.math.isPowerOfTwo(length));
-    assert(length >= 4 and length <= 512);
-    assert(re.len == im.len);
-
-    var re_temp_storage: [128]F32x4 = undefined;
-    var im_temp_storage: [128]F32x4 = undefined;
-    var re_temp = re_temp_storage[0..re.len];
-    var im_temp = im_temp_storage[0..im.len];
-
-    std.mem.copy(F32x4, re_temp, re);
-    std.mem.copy(F32x4, im_temp, im);
-
-    if (length > 16) {
-        assert(unity_table.len == length);
-        fftN(re_temp, im_temp, unity_table, length, 1);
-    } else if (length == 16) {
-        fft16(re_temp, im_temp, 1);
-    } else if (length == 8) {
-        fft8(re_temp, im_temp, 1);
-    } else if (length == 4) {
-        fft4(re_temp, im_temp, 1);
-    }
-
-    fftUnswizzle(re_temp, re);
-    fftUnswizzle(im_temp, im);
-}
-
-pub fn ifft(re: []F32x4, im: []const F32x4, unity_table: []const F32x4) void {
-    const length = @intCast(u32, re.len * 4);
-    assert(std.math.isPowerOfTwo(length));
-    assert(length >= 4 and length <= 512);
-    assert(re.len == im.len);
-
-    var re_temp_storage: [128]F32x4 = undefined;
-    var im_temp_storage: [128]F32x4 = undefined;
-    var re_temp = re_temp_storage[0..re.len];
-    var im_temp = im_temp_storage[0..im.len];
-
-    const rnp = f32x4s(1.0 / @intToFloat(f32, length));
-    const rnm = f32x4s(-1.0 / @intToFloat(f32, length));
-
-    for (re, 0..) |_, i| {
-        re_temp[i] = re[i] * rnp;
-        im_temp[i] = im[i] * rnm;
-    }
-
-    if (length > 16) {
-        assert(unity_table.len == length);
-        fftN(re_temp, im_temp, unity_table, length, 1);
-    } else if (length == 16) {
-        fft16(re_temp, im_temp, 1);
-    } else if (length == 8) {
-        fft8(re_temp, im_temp, 1);
-    } else if (length == 4) {
-        fft4(re_temp, im_temp, 1);
-    }
-
-    fftUnswizzle(re_temp, re);
-}
-test "zmath.ifft" {
-    var unity_table: [512]F32x4 = undefined;
-    const epsilon = 0.0001;
-
-    // 64 samples
-    {
-        var re = [_]F32x4{
-            f32x4(1.0, 2.0, 3.0, 4.0),     f32x4(5.0, 6.0, 7.0, 8.0),
-            f32x4(9.0, 10.0, 11.0, 12.0),  f32x4(13.0, 14.0, 15.0, 16.0),
-            f32x4(17.0, 18.0, 19.0, 20.0), f32x4(21.0, 22.0, 23.0, 24.0),
-            f32x4(25.0, 26.0, 27.0, 28.0), f32x4(29.0, 30.0, 31.0, 32.0),
-            f32x4(1.0, 2.0, 3.0, 4.0),     f32x4(5.0, 6.0, 7.0, 8.0),
-            f32x4(9.0, 10.0, 11.0, 12.0),  f32x4(13.0, 14.0, 15.0, 16.0),
-            f32x4(17.0, 18.0, 19.0, 20.0), f32x4(21.0, 22.0, 23.0, 24.0),
-            f32x4(25.0, 26.0, 27.0, 28.0), f32x4(29.0, 30.0, 31.0, 32.0),
-        };
-        var im = [_]F32x4{
-            f32x4s(0.0), f32x4s(0.0), f32x4s(0.0), f32x4s(0.0),
-            f32x4s(0.0), f32x4s(0.0), f32x4s(0.0), f32x4s(0.0),
-            f32x4s(0.0), f32x4s(0.0), f32x4s(0.0), f32x4s(0.0),
-            f32x4s(0.0), f32x4s(0.0), f32x4s(0.0), f32x4s(0.0),
-        };
-
-        fftInitUnityTable(unity_table[0..64]);
-        fft(re[0..], im[0..], unity_table[0..64]);
-
-        try expect(approxEqAbs(re[0], f32x4(1056.0, 0.0, -32.0, 0.0), epsilon));
-        var i: u32 = 1;
-        while (i < 16) : (i += 1) {
-            try expect(approxEqAbs(re[i], f32x4(-32.0, 0.0, -32.0, 0.0), epsilon));
-        }
-
-        ifft(re[0..], im[0..], unity_table[0..64]);
-
-        try expect(approxEqAbs(re[0], f32x4(1.0, 2.0, 3.0, 4.0), epsilon));
-        try expect(approxEqAbs(re[1], f32x4(5.0, 6.0, 7.0, 8.0), epsilon));
-        try expect(approxEqAbs(re[2], f32x4(9.0, 10.0, 11.0, 12.0), epsilon));
-        try expect(approxEqAbs(re[3], f32x4(13.0, 14.0, 15.0, 16.0), epsilon));
-        try expect(approxEqAbs(re[4], f32x4(17.0, 18.0, 19.0, 20.0), epsilon));
-        try expect(approxEqAbs(re[5], f32x4(21.0, 22.0, 23.0, 24.0), epsilon));
-        try expect(approxEqAbs(re[6], f32x4(25.0, 26.0, 27.0, 28.0), epsilon));
-        try expect(approxEqAbs(re[7], f32x4(29.0, 30.0, 31.0, 32.0), epsilon));
-    }
-
-    // 512 samples
-    {
-        var re: [128]F32x4 = undefined;
-        var im = [_]F32x4{f32x4s(0.0)} ** 128;
-
-        for (&re, 0..) |*v, i| {
-            const f = @intToFloat(f32, i * 4);
-            v.* = f32x4(f + 1.0, f + 2.0, f + 3.0, f + 4.0);
-        }
-
-        fftInitUnityTable(unity_table[0..512]);
-        fft(re[0..], im[0..], unity_table[0..512]);
-
-        for (re, 0..) |v, i| {
-            const f = @intToFloat(f32, i * 4);
-            try expect(!approxEqAbs(v, f32x4(f + 1.0, f + 2.0, f + 3.0, f + 4.0), epsilon));
-        }
-
-        ifft(re[0..], im[0..], unity_table[0..512]);
-
-        for (re, 0..) |v, i| {
-            const f = @intToFloat(f32, i * 4);
-            try expect(approxEqAbs(v, f32x4(f + 1.0, f + 2.0, f + 3.0, f + 4.0), epsilon));
-        }
-    }
-}
-// ------------------------------------------------------------------------------
-//
-// Private functions and constants
-//
-// ------------------------------------------------------------------------------
-const f32x4_sign_mask1: F32x4 = F32x4{ @bitCast(f32, @as(u32, 0x8000_0000)), 0, 0, 0 };
-const f32x4_mask2: F32x4 = F32x4{
-    @bitCast(f32, @as(u32, 0xffff_ffff)),
-    @bitCast(f32, @as(u32, 0xffff_ffff)),
-    0,
-    0,
-};
-const f32x4_mask3: F32x4 = F32x4{
-    @bitCast(f32, @as(u32, 0xffff_ffff)),
-    @bitCast(f32, @as(u32, 0xffff_ffff)),
-    @bitCast(f32, @as(u32, 0xffff_ffff)),
-    0,
-};
-
-inline fn splatNegativeZero(comptime T: type) T {
-    return @splat(veclen(T), @bitCast(f32, @as(u32, 0x8000_0000)));
-}
-inline fn splatNoFraction(comptime T: type) T {
-    return @splat(veclen(T), @as(f32, 8_388_608.0));
-}
-inline fn splatAbsMask(comptime T: type) T {
-    return @splat(veclen(T), @bitCast(f32, @as(u32, 0x7fff_ffff)));
-}
-
-fn floatToIntAndBack(v: anytype) @TypeOf(v) {
-    // This routine won't handle nan, inf and numbers greater than 8_388_608.0 (will generate undefined values).
-    @setRuntimeSafety(false);
-
-    const T = @TypeOf(v);
-    const len = veclen(T);
-
-    var vi32: [len]i32 = undefined;
-    comptime var i: u32 = 0;
-    // vcvttps2dq
-    inline while (i < len) : (i += 1) {
-        vi32[i] = @floatToInt(i32, v[i]);
-    }
-
-    var vf32: [len]f32 = undefined;
-    i = 0;
-    // vcvtdq2ps
-    inline while (i < len) : (i += 1) {
-        vf32[i] = @intToFloat(f32, vi32[i]);
-    }
-
-    return vf32;
-}
-test "zmath.floatToIntAndBack" {
-    {
-        const v = floatToIntAndBack(f32x4(1.1, 2.9, 3.0, -4.5));
-        try expect(approxEqAbs(v, f32x4(1.0, 2.0, 3.0, -4.0), 0.0));
-    }
-    {
-        const v = floatToIntAndBack(f32x8(1.1, 2.9, 3.0, -4.5, 2.5, -2.5, 1.1, -100.2));
-        try expect(approxEqAbs(v, f32x8(1.0, 2.0, 3.0, -4.0, 2.0, -2.0, 1.0, -100.0), 0.0));
-    }
-    {
-        const v = floatToIntAndBack(f32x4(math.inf_f32, 2.9, math.nan_f32, math.qnan_f32));
-        try expect(v[1] == 2.0);
-    }
-}
-
-pub fn approxEqAbs(v0: anytype, v1: anytype, eps: f32) bool {
-    const T = @TypeOf(v0, v1);
-    comptime var i: comptime_int = 0;
-    inline while (i < veclen(T)) : (i += 1) {
-        if (!math.approxEqAbs(f32, v0[i], v1[i], eps)) {
-            return false;
-        }
-    }
-    return true;
-}
-
-// ------------------------------------------------------------------------------
-// This software is available under 2 licenses -- choose whichever you prefer.
-// ------------------------------------------------------------------------------
-// ALTERNATIVE A - MIT License
-// Copyright (c) 2022 Michal Ziulek and Contributors
-// Permission is hereby granted, free of charge, to any person obtaining a copy of
-// this software and associated documentation files (the "Software"), to deal in
-// the Software without restriction, including without limitation the rights to
-// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
-// of the Software, and to permit persons to whom the Software is furnished to do
-// so, subject to the following conditions:
-// The above copyright notice and this permission notice shall be included in all
-// copies or substantial portions of the Software.
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-// SOFTWARE.
-// ------------------------------------------------------------------------------
-// ALTERNATIVE B - Public Domain (www.unlicense.org)
-// This is free and unencumbered software released into the public domain.
-// Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
-// software, either in source code form or as a compiled binary, for any purpose,
-// commercial or non-commercial, and by any means.
-// In jurisdictions that recognize copyright laws, the author or authors of this
-// software dedicate any and all copyright interest in the software to the public
-// domain. We make this dedication for the benefit of the public at large and to
-// the detriment of our heirs and successors. We intend this dedication to be an
-// overt act of relinquishment in perpetuity of all present and future rights to
-// this software under copyright law.
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-// AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-// ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
-// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-// ------------------------------------------------------------------------------
diff --git a/src/SomaSolve.cpp b/src/SomaSolve.cpp
index 6f1007e..77afa30 100644
--- a/src/SomaSolve.cpp
+++ b/src/SomaSolve.cpp
@@ -1,15 +1,11 @@
-#include <bitset>
-#include <span>
-#include <cstdint>
 #include <iostream>
 #include <string>
-#include <algorithm>
 #include <vector>
 #include "VoxelSpace.h"
 
-auto get_dims_input(int dims[3]) -> void {
+void get_dims_input(int dims[3]) {
     std::cout << "Enter dimensions separated by newlines. (x*y*z must not exceed 64)\n";
-    auto success = false;
+    bool success = false;
     while (!success) {
         std::cout << "x: ";
         std::cin >> dims[0];
@@ -18,7 +14,7 @@ auto get_dims_input(int dims[3]) -> void {
         std::cout << "z: ";
         std::cin >> dims[2];
 
-        auto size = dims[0]*dims[1]*dims[2];
+        int size = dims[0]*dims[1]*dims[2];
         if (size <= 64) {
             success = true;
         } else {
@@ -28,12 +24,12 @@ auto get_dims_input(int dims[3]) -> void {
     }
 }
 
-auto get_reprs_input(int units_required) -> std::vector<uint64_t> {
+std::vector<uint64> get_reprs_input(int units_required) {
     std::cout << "Enter bit-representations (big endian, max 64 bits, total 1s must add up to " << units_required << "). press ENTER twice to finish input.\n";
-    auto reprs = std::vector<uint64_t>();
-    auto total_units = 0;
+    std::vector<uint64> reprs = std::vector<uint64>();
+    int total_units = 0;
     while (true) {
-        auto input = std::string();
+        std::string input = std::string();
         std::getline(std::cin, input);
         if (input.size() == 0) {
             if (total_units == units_required) {
@@ -44,12 +40,12 @@ auto get_reprs_input(int units_required) -> std::vector<uint64_t> {
                 continue;
             }
         }
-        auto bit_repr = 0ul;
-        auto i = 0;
-        auto good_repr = true;
+        uint64 bit_repr = 0;
+        int i = 0;
+        bool good_repr = true;
         for (auto it = input.rbegin(); it < input.rend(); it++, i++) {
             if (*it == '1') {
-                bit_repr |= 1ul << i;
+                bit_repr |= 1 << i;
                 total_units++;
             } else if (*it != '0' || i >= 64) {
                 std::cout << "Input invalid. Enter a binary string only with max 64 bits." << '\n';
@@ -64,191 +60,189 @@ auto get_reprs_input(int units_required) -> std::vector<uint64_t> {
     return reprs;
 }
 
-namespace SomaSolve {
-    using SomaSolution = std::vector<uint64_t>;
+typedef std::vector<uint64> SomaSolution;
 
-    struct Solver {
-        std::vector<uint64_t>* input;
-        std::vector<int>* offsets;
-        std::vector<SomaSolution>* solutions;
-    };
+struct Solver {
+    std::vector<uint64>* input;
+    std::vector<int>* offsets;
+    std::vector<SomaSolution>* solutions;
+};
 
-    auto STD_SOMA = std::vector<uint64_t>{ 23ul, 30ul, 15ul, 1043ul, 24594ul, 12306ul, 11ul };
+std::vector<uint64> STD_SOMA = { 23ul, 30ul, 15ul, 1043ul, 24594ul, 12306ul, 11ul };
 
-    auto backtrack_solve_iter(std::vector<uint64_t> *polycube_input, std::vector<int> *offsets)-> void {
-        auto num_inputs = offsets->size() - 1;
+void backtrack_solve_iter(std::vector<uint64> *polycube_input, std::vector<int> *offsets) {
+    int num_inputs = offsets->size() - 1;
 
-        auto solns = std::vector<int>();
+    std::vector<int> solns = std::vector<int>();
 
-        auto iter_stack = std::vector<int>();
-        auto curr_soln_stack = std::vector<int>();
-        auto soln_spaces_stack = std::vector<uint64_t>();
-        soln_spaces_stack.push_back(0ul);
+    std::vector<int> iter_stack = std::vector<int>();
+    std::vector<int> curr_soln_stack = std::vector<int>();
+    std::vector<uint64> soln_spaces_stack = std::vector<uint64>();
+    soln_spaces_stack.push_back(0ul);
 
-        auto depth = 0;
+    int depth = 0;
 
-        while (depth >= 0) {
-            if (depth >= iter_stack.size()) {
-                iter_stack.push_back(offsets->at(depth));
-            }
-            auto end = offsets->at(depth + 1);
-            auto broke = false;
-            for (; iter_stack[depth] < end; iter_stack[depth]++) {
-                auto next_space = polycube_input->at(iter_stack[depth]);
-                auto soln_space = soln_spaces_stack[depth];
-                std::cout << next_space << " " << soln_space << std::endl;
-                auto successful_fuse = (soln_space | next_space) == (soln_space ^ next_space);
-                if (successful_fuse) {
-                    soln_spaces_stack.push_back(soln_space |= next_space);
-                    curr_soln_stack.push_back(iter_stack[depth]);
-                    depth++;
-                    if (curr_soln_stack.size() == num_inputs) {
-                        solns.push_back(1);
-                        curr_soln_stack.pop_back();
-                        soln_spaces_stack.pop_back();
-                        depth--;
-                    } else {
-                        depth++;
-                        auto broke = true;
-                        break;
-                    }
-                }
-            }
-            if (!broke) {
-                curr_soln_stack.pop_back();
-                soln_spaces_stack.pop_back();
-                depth--;
-            }
+    while (depth >= 0) {
+        if (depth >= iter_stack.size()) {
+            iter_stack.push_back(offsets->at(depth));
         }
-        std::cout << "Done. Found " << solns.size() << " solutions." << std::endl;
-    }
-
-    auto backtrack_solve(Solver *solver, uint64_t working_solution = 0ul, int curr_piece = 0) -> void {
-        auto input = solver->input;
-        auto offsets = solver->offsets;
-        auto solutions = solver->solutions;
-        auto start = offsets->at(curr_piece);
-        auto end = offsets->at(curr_piece + 1);
-        auto num_pieces = offsets->size() - 1;
-        for (int i = start; i < end; i++) {
-            auto successful_fuse = !Voxel::collides(working_solution, input->at(i));
+        int end = offsets->at(depth + 1);
+        bool broke = false;
+        for (; iter_stack[depth] < end; iter_stack[depth]++) {
+            uint64 next_space = polycube_input->at(iter_stack[depth]);
+            uint64 soln_space = soln_spaces_stack[depth];
+            std::cout << next_space << " " << soln_space << std::endl;
+            bool successful_fuse = (soln_space | next_space) == (soln_space ^ next_space);
             if (successful_fuse) {
-                auto new_working_solution = working_solution | input->at(i);
-                solutions->back().at(curr_piece) = input->at(i);
-                if (curr_piece == num_pieces - 1) {
-                    auto last_soln = solutions->back();
-                    solutions->push_back(SomaSolution(last_soln.begin(), last_soln.end()));
-                    return;
+                soln_spaces_stack.push_back(soln_space |= next_space);
+                curr_soln_stack.push_back(iter_stack[depth]);
+                depth++;
+                if (curr_soln_stack.size() == num_inputs) {
+                    solns.push_back(1);
+                    curr_soln_stack.pop_back();
+                    soln_spaces_stack.pop_back();
+                    depth--;
                 } else {
-                    backtrack_solve(solver, new_working_solution, curr_piece + 1);
-                }
-            }
-        }
-        if (curr_piece == 0) {
-            solutions->pop_back();
-        } 
-    }
-
-    auto get_solution_rotations(SomaSolution *solution, int dims[3]) -> std::vector<SomaSolution> {
-        auto result = std::vector<SomaSolution>(Voxel::NUM_ROTS_3D);
-        for (int piece_i = 0; piece_i < solution->size(); piece_i++) {
-            auto space = Voxel::Space{ 
-                .space=solution->at(piece_i),
-                .dim_x=dims[0],
-                .dim_y=dims[1],
-                .dim_z=dims[2],
-            };
-            auto piece_rotations = Voxel::getAllRotations(&space);
-            for (int rot_i = 0; rot_i < piece_rotations.size(); rot_i++) {
-                result[rot_i].push_back(piece_rotations[rot_i].space);
-            }
-        }
-        return result;
-    }
-
-    auto filter_unique(std::vector<SomaSolution> *solutions, int dims[3]) -> std::vector<SomaSolution> {
-        if (solutions->size() == 0) {
-            return std::vector<SomaSolution>();
-        }
-        auto unique_solns = std::vector<SomaSolution>{};
-        for (auto &solution : *solutions) {
-            auto found_match = false;
-            for (auto &rotation : get_solution_rotations(&solution, dims)) { 
-                for (auto &unique_soln : unique_solns) {
-                    auto is_match = true;
-                    for (int piece_i = 0; piece_i < unique_soln.size(); piece_i++) {
-                        if (rotation[piece_i] != unique_soln[piece_i]) {
-                            is_match = false;
-                            break;
-                        }
-                    }
-                    if (is_match) {
-                        found_match = true;
-                        break;
-                    }
-                }
-                if (found_match) {
+                    depth++;
+                    broke = true;
                     break;
                 }
             }
-            if (!found_match) {
-                unique_solns.push_back(SomaSolution(solution));
+        }
+        if (!broke) {
+            curr_soln_stack.pop_back();
+            soln_spaces_stack.pop_back();
+            depth--;
+        }
+    }
+    std::cout << "Done. Found " << solns.size() << " solutions." << std::endl;
+}
+
+void backtrack_solve(Solver *solver, uint64 working_solution = 0, int curr_piece = 0) {
+    std::vector<uint64> *input = solver->input;
+    std::vector<int> *offsets = solver->offsets;
+    std::vector<SomaSolution> *solutions = solver->solutions;
+    int start = offsets->at(curr_piece);
+    int end = offsets->at(curr_piece + 1);
+    size_t num_pieces = offsets->size() - 1;
+    for (int i = start; i < end; i++) {
+        bool successful_fuse = !collides(working_solution, input->at(i));
+        if (successful_fuse) {
+            uint64 new_working_solution = working_solution | input->at(i);
+            solutions->back().at(curr_piece) = input->at(i);
+            if (curr_piece == num_pieces - 1) {
+                std::vector<uint64> last_soln = solutions->back();
+                solutions->push_back(SomaSolution(last_soln.begin(), last_soln.end()));
+                return;
+            } else {
+                backtrack_solve(solver, new_working_solution, curr_piece + 1);
             }
         }
-        return unique_solns;
-    }
-
-    auto solve(std::vector<uint64_t> *reprs_in, int dims[3]) -> std::vector<SomaSolution> {
-        auto reprs = *reprs_in;
-        auto offsets = std::vector<int>();
-        auto polycubes = std::vector<uint64_t>();
-        polycubes.reserve(reprs.size() * 10);
-
-        auto model_space = Voxel::Space{ 
-            .space={},
-            .dim_x=dims[0],
-            .dim_y=dims[1],
-            .dim_z=dims[2],
-        };
-
-        offsets.push_back(0);
-        auto space = model_space;
-        space.space = reprs[0];
-        Voxel::cullEmptySpace(&space);
-        auto positions = Voxel::getAllPositionsInPrism(&space, dims);
-        polycubes.insert(polycubes.end(), positions.begin(), positions.end());
-
-        for (int i = 1; i < reprs.size(); i++) {
-            offsets.push_back(polycubes.size());
-            auto space = model_space;
-            space.space = reprs[i];
-            Voxel::cullEmptySpace(&space);
-            auto perms = Voxel::getAllPermutationsInPrism(&space, dims);
-            polycubes.insert(polycubes.end(), perms.begin(), perms.end());
-        }
-
-        offsets.push_back(polycubes.size());
-
-        auto solutions = std::vector<SomaSolution>{std::vector<uint64_t>(reprs.size())};
-        auto solver = Solver{
-            .input=&polycubes,
-            .offsets=&offsets,
-            .solutions=&solutions,
-        };
-
-        backtrack_solve(&solver);
-
-        return filter_unique(solver.solutions, dims);
-    }
-
-
-    auto interactive_cmd_line_solve_soma() -> void {
-        int dims[3] = { 3, 3, 3 };
-        //get_dims_input(dims);
-        //std::cout << '\n';
-        //auto reprs = get_reprs_input(dims[0]*dims[1]*dims[2]);
-        std::cout << "Great. Calculating solutions...\n";
-        auto solutions = SomaSolve::solve(&SomaSolve::STD_SOMA, std::array<int, 3>{ 3, 3, 3 }.data());
-        std::cout << solutions.size() << " solutions found." << std::endl;
     }
+    if (curr_piece == 0) {
+        solutions->pop_back();
+    } 
+}
+
+std::vector<SomaSolution> get_solution_rotations(SomaSolution *solution, int dims[3]) {
+    std::vector<SomaSolution> result = std::vector<SomaSolution>(NUM_ROTS_3D);
+    for (int piece_i = 0; piece_i < solution->size(); piece_i++) {
+        Space space = { 
+            solution->at(piece_i),
+            dims[0],
+            dims[1],
+            dims[2],
+        };
+        std::vector<Space> piece_rotations = getAllRotations(&space);
+        for (int rot_i = 0; rot_i < piece_rotations.size(); rot_i++) {
+            result[rot_i].push_back(piece_rotations[rot_i].space);
+        }
+    }
+    return result;
+}
+
+std::vector<SomaSolution> filter_unique(std::vector<SomaSolution> *solutions, int dims[3]) {
+    if (solutions->size() == 0) {
+        return std::vector<SomaSolution>();
+    }
+    std::vector<SomaSolution> unique_solns = std::vector<SomaSolution>{};
+    for (std::vector<uint64> &solution : *solutions) {
+        bool found_match = false;
+        for (SomaSolution &rotation : get_solution_rotations(&solution, dims)) { 
+            for (auto &unique_soln : unique_solns) {
+                bool is_match = true;
+                for (int piece_i = 0; piece_i < unique_soln.size(); piece_i++) {
+                    if (rotation[piece_i] != unique_soln[piece_i]) {
+                        is_match = false;
+                        break;
+                    }
+                }
+                if (is_match) {
+                    found_match = true;
+                    break;
+                }
+            }
+            if (found_match) {
+                break;
+            }
+        }
+        if (!found_match) {
+            unique_solns.push_back(SomaSolution(solution));
+        }
+    }
+    return unique_solns;
+}
+
+std::vector<SomaSolution> solve(std::vector<uint64> *reprs_in, int dims[3]) {
+    std::vector<uint64> reprs = *reprs_in;
+    std::vector<int> offsets = std::vector<int>();
+    std::vector<uint64> polycubes = std::vector<uint64>();
+    polycubes.reserve(reprs.size() * 10);
+
+    Space model_space = { 
+        {},
+        dims[0],
+        dims[1],
+        dims[2],
+    };
+
+    offsets.push_back(0);
+    Space space = model_space;
+    space.space = reprs[0];
+    cullEmptySpace(&space);
+    std::vector<uint64> positions = getAllPositionsInPrism(&space, dims);
+    polycubes.insert(polycubes.end(), positions.begin(), positions.end());
+
+    for (int i = 1; i < reprs.size(); i++) {
+        offsets.push_back(polycubes.size());
+        Space space = model_space;
+        space.space = reprs[i];
+        cullEmptySpace(&space);
+        std::vector<uint64> perms = getAllPermutationsInPrism(&space, dims);
+        polycubes.insert(polycubes.end(), perms.begin(), perms.end());
+    }
+
+    offsets.push_back(polycubes.size());
+
+    std::vector<SomaSolution> solutions = {std::vector<uint64>(reprs.size())};
+    Solver solver = {
+        &polycubes,
+        &offsets,
+        &solutions,
+    };
+
+    backtrack_solve(&solver);
+
+    return filter_unique(solver.solutions, dims);
+}
+
+
+void interactive_cmd_line_solve_soma() {
+    int dims[3] = { 3, 3, 3 };
+    //get_dims_input(dims);
+    //std::cout << '\n';
+    //auto reprs = get_reprs_input(dims[0]*dims[1]*dims[2]);
+    std::cout << "Great. Calculating solutions...\n";
+    std::vector<SomaSolution> solutions = solve(&STD_SOMA, dims);
+    std::cout << solutions.size() << " solutions found." << std::endl;
 }
diff --git a/src/SomaSolve.h b/src/SomaSolve.h
index bdcbee0..3f69d34 100644
--- a/src/SomaSolve.h
+++ b/src/SomaSolve.h
@@ -1,10 +1,7 @@
 #include <cstdint>
 #include <vector>
 
-namespace SomaSolve {
-    extern std::vector<uint64_t> STD_SOMA; 
-    using SomaSolution = std::vector<uint64_t>;
-    auto solve(std::vector<uint64_t> *reprs_in, int dims[3]) -> std::vector<SomaSolution>; 
-    auto interactive_cmd_line_solve_soma() -> void; 
-}
-
+extern std::vector<uint64_t> STD_SOMA; 
+typedef std::vector<uint64_t> SomaSolution;
+std::vector<SomaSolution> solve(std::vector<uint64_t> *reprs_in, int dims[3]); 
+void interactive_cmd_line_solve_soma(); 
diff --git a/src/VoxelSpace.cpp b/src/VoxelSpace.cpp
index d2f5aa7..30a27a8 100644
--- a/src/VoxelSpace.cpp
+++ b/src/VoxelSpace.cpp
@@ -5,284 +5,282 @@
 #include <cstdint>
 #include "VoxelSpace.h"
 
-namespace Voxel {
-    auto index(int dim_y, int dim_z, int x, int y, int z) -> int {
-        return dim_y * dim_z * x + dim_z * y + z;
-    }
-    
-    // ┌          ┐   ┌   ┐   ┌   ┐ 
-    // │ 1, 0,  0 │   │ x │   │ x │
-    // │ 0, 0, -1 │ * │ y │ = │-z │
-    // │ 0, 1,  0 │   │ z │   │ y │
-    // └          ┘   └   ┘   └   ┘
-    auto newIndexRotX(Space *space, int x, int y, int z) -> int {
-        return space->dim_z * space->dim_y * x + space->dim_y * (space->dim_z - 1 - z) + y;
-    }
+int index(int dim_y, int dim_z, int x, int y, int z) {
+    return dim_y * dim_z * x + dim_z * y + z;
+}
 
-    // ┌          ┐   ┌   ┐   ┌   ┐ 
-    // │  0, 0, 1 │   │ x │   │ z │
-    // │  0, 1, 0 │ * │ y │ = │-y │
-    // │ -1, 0, 0 │   │ z │   │ x │
-    // └          ┘   └   ┘   └   ┘
-    auto newIndexRotY(Space *space, int x, int y, int z) -> int {
-        return space->dim_y * space->dim_x * z + space->dim_x * y + (space->dim_x - 1 - x);
+// ┌          ┐   ┌   ┐   ┌   ┐ 
+// │ 1, 0,  0 │   │ x │   │ x │
+// │ 0, 0, -1 │ * │ y │ = │-z │
+// │ 0, 1,  0 │   │ z │   │ y │
+// └          ┘   └   ┘   └   ┘
+int newIndexRotX(Space *space, int x, int y, int z) {
+    return space->dim_z * space->dim_y * x + space->dim_y * (space->dim_z - 1 - z) + y;
+}
+
+// ┌          ┐   ┌   ┐   ┌   ┐ 
+// │  0, 0, 1 │   │ x │   │ z │
+// │  0, 1, 0 │ * │ y │ = │-y │
+// │ -1, 0, 0 │   │ z │   │ x │
+// └          ┘   └   ┘   └   ┘
+int newIndexRotY(Space *space, int x, int y, int z) {
+    return space->dim_y * space->dim_x * z + space->dim_x * y + (space->dim_x - 1 - x);
+}
+
+// ┌          ┐   ┌   ┐   ┌   ┐ 
+// │ 0, -1, 0 │   │ x │   │-y │
+// │ 1,  0, 0 │ * │ y │ = │ x │
+// │ 0,  0, 1 │   │ z │   │ z │
+// └          ┘   └   ┘   └   ┘
+int newIndexRotZ(Space *space, int x, int y, int z) {
+    return space->dim_x * space->dim_z * (space->dim_y - 1 - y) + space->dim_z * x + z;
+}
+
+uint64 toggle(uint64_t space, int index) {
+    space ^= 1ul << index;
+    return space;
+}
+
+uint64 set(uint64_t space, int index, bool val) {
+    if (val) {
+        space |= 1ul << index;
+    } else {
+        space &= ~(1ul << index);
     }
+    return space;
+}
 
-    // ┌          ┐   ┌   ┐   ┌   ┐ 
-    // │ 0, -1, 0 │   │ x │   │-y │
-    // │ 1,  0, 0 │ * │ y │ = │ x │
-    // │ 0,  0, 1 │   │ z │   │ z │
-    // └          ┘   └   ┘   └   ┘
-    auto newIndexRotZ(Space *space, int x, int y, int z) -> int {
-        return space->dim_x * space->dim_z * (space->dim_y - 1 - y) + space->dim_z * x + z;
-    }
+bool collides(uint64_t a, uint64_t b) {
+    return (a | b) != (a ^ b);
+}
 
-    auto toggle(uint64_t space, int index) -> uint64_t {
-        space ^= 1ul << index;
-        return space;
-    }
+bool collides(Space *a, Space *b) {
+    return (a->space | b->space) != (a->space ^ b->space);
+}
 
-    auto set(uint64_t space, int index, bool val) -> uint64_t {
-        if (val) {
-            space |= 1ul << index;
-        } else {
-            space &= ~(1ul << index);
-        }
-        return space;
-    }
+bool filledAt(Space *space, int x, int y, int z) {
+    uint64 mask = 1ul << (space->dim_y * space->dim_z * x + space->dim_z * y + z);
+    return (space->space & mask) != 0ul;
+}
 
-    auto collides(uint64_t a, uint64_t b) -> bool {
-        return (a | b) != (a ^ b);
-    }
+Extrema getExtrema(Space *space) {
+    Extrema extrema = {
+       0,
+       space->dim_x,
+       0,
+       space->dim_y,
+       0,
+       space->dim_z,
+    };
 
-    auto collides(Space *a, Space *b) -> bool {
-        return (a->space | b->space) != (a->space ^ b->space);
-    }
-
-    auto filledAt(Space *space, int x, int y, int z) -> bool {
-        auto mask = 1ul << (space->dim_y * space->dim_z * x + space->dim_z * y + z);
-        return (space->space & mask) != 0ul;
-    }
-
-    auto getExtrema(Space *space) -> Extrema {
-        auto extrema = Extrema{
-           .xMax=0,
-           .xMin=space->dim_x,
-           .yMax=0,
-           .yMin=space->dim_y,
-           .zMax=0,
-           .zMin=space->dim_z,
-        };
-
-        for (int x = 0; x < space->dim_x; x++) {
-            for (int y = 0; y < space->dim_y; y++) {
-                for (int z = 0; z < space->dim_z; z++) {
-                    if (filledAt(space, x, y, z)) {
-                        if (x > extrema.xMax) extrema.xMax = x;
-                        if (x < extrema.xMin) extrema.xMin = x;
-                        if (y > extrema.yMax) extrema.yMax = y;
-                        if (y < extrema.yMin) extrema.yMin = y;
-                        if (z > extrema.zMax) extrema.zMax = z;
-                        if (z < extrema.zMin) extrema.zMin = z;
-                    }
+    for (int x = 0; x < space->dim_x; x++) {
+        for (int y = 0; y < space->dim_y; y++) {
+            for (int z = 0; z < space->dim_z; z++) {
+                if (filledAt(space, x, y, z)) {
+                    if (x > extrema.xMax) extrema.xMax = x;
+                    if (x < extrema.xMin) extrema.xMin = x;
+                    if (y > extrema.yMax) extrema.yMax = y;
+                    if (y < extrema.yMin) extrema.yMin = y;
+                    if (z > extrema.zMax) extrema.zMax = z;
+                    if (z < extrema.zMin) extrema.zMin = z;
                 }
             }
         }
-
-        return extrema;
     }
 
-    auto cullEmptySpace(Space *space) -> void {
-        auto extrema = getExtrema(space);
-        auto space_index = 0;
-        auto newSpace = 0ul;
-        for (int x = extrema.xMin; x <= extrema.xMax; x++) {
-            for (int y = extrema.yMin; y <= extrema.yMax; y++) {
-                for (int z = extrema.zMin; z <= extrema.zMax; z++) {
-                    if (filledAt(space, x, y, z)) {
-                        newSpace |= 1ul << space_index;
-                    }
-                    space_index++;
+    return extrema;
+}
+
+void cullEmptySpace(Space *space) {
+    Extrema extrema = getExtrema(space);
+    int space_index = 0;
+    uint64 newSpace = 0ul;
+    for (int x = extrema.xMin; x <= extrema.xMax; x++) {
+        for (int y = extrema.yMin; y <= extrema.yMax; y++) {
+            for (int z = extrema.zMin; z <= extrema.zMax; z++) {
+                if (filledAt(space, x, y, z)) {
+                    newSpace |= 1ul << space_index;
+                }
+                space_index++;
+            }
+        }
+    }
+    space->dim_x = extrema.xMax - extrema.xMin + 1;
+    space->dim_y = extrema.yMax - extrema.yMin + 1;
+    space->dim_z = extrema.zMax - extrema.zMin + 1;
+    space->space = newSpace;
+}
+
+void rotate90X(Space *space) {
+    uint64 new_space = 0;
+    for (int x = 0; x < space->dim_x; x++) {
+        for (int y = 0; y < space->dim_y; y++) {
+            for (int z = 0; z < space->dim_z; z++) {
+                if (filledAt(space, x, y, z)) {
+                    new_space |= 1 << newIndexRotX(space, x, y, z);
                 }
             }
         }
-        space->dim_x = extrema.xMax - extrema.xMin + 1;
-        space->dim_y = extrema.yMax - extrema.yMin + 1;
-        space->dim_z = extrema.zMax - extrema.zMin + 1;
-        space->space = newSpace;
     }
+    int temp = space->dim_y;
+    space->dim_y = space->dim_z;
+    space->dim_z = temp;
+    space->space = new_space;
+}
 
-    auto rotate90X(Space *space) -> void {
-        auto new_space = 0ul;
-        for (int x = 0; x < space->dim_x; x++) {
-            for (int y = 0; y < space->dim_y; y++) {
-                for (int z = 0; z < space->dim_z; z++) {
-                    if (filledAt(space, x, y, z)) {
-                        new_space |= 1 << newIndexRotX(space, x, y, z);
-                    }
+void rotate90Y(Space *space) {
+    uint64 new_space = 0;
+    for (int x = 0; x < space->dim_x; x++) {
+        for (int y = 0; y < space->dim_y; y++) {
+            for (int z = 0; z < space->dim_z; z++) {
+                if (filledAt(space, x, y, z)) {
+                    new_space |= 1 << newIndexRotY(space, x, y, z);
                 }
             }
         }
-        auto temp = space->dim_y;
-        space->dim_y = space->dim_z;
-        space->dim_z = temp;
-        space->space = new_space;
     }
+    int temp = space->dim_x;
+    space->dim_x = space->dim_z;
+    space->dim_z = temp;
+    space->space = new_space;
+}
 
-    auto rotate90Y(Space *space) -> void {
-        auto new_space = 0ul;
-        for (int x = 0; x < space->dim_x; x++) {
-            for (int y = 0; y < space->dim_y; y++) {
-                for (int z = 0; z < space->dim_z; z++) {
-                    if (filledAt(space, x, y, z)) {
-                        new_space |= 1 << newIndexRotY(space, x, y, z);
-                    }
+void rotate90Z(Space *space) {
+    uint64 new_space = 0;
+    for (int x = 0; x < space->dim_x; x++) {
+        for (int y = 0; y < space->dim_y; y++) {
+            for (int z = 0; z < space->dim_z; z++) {
+                if (filledAt(space, x, y, z)) {
+                    new_space |= 1 << newIndexRotZ(space, x, y, z);
                 }
             }
         }
-        auto temp = space->dim_x;
-        space->dim_x = space->dim_z;
-        space->dim_z = temp;
-        space->space = new_space;
     }
+    int temp = space->dim_x;
+    space->dim_x = space->dim_y;
+    space->dim_y = temp;
+    space->space = new_space;
+}
 
-    auto rotate90Z(Space *space) -> void {
-        auto new_space = 0ul;
-        for (int x = 0; x < space->dim_x; x++) {
-            for (int y = 0; y < space->dim_y; y++) {
-                for (int z = 0; z < space->dim_z; z++) {
-                    if (filledAt(space, x, y, z)) {
-                        new_space |= 1 << newIndexRotZ(space, x, y, z);
-                    }
-                }
+bool isMatch(Space *a, Space *b) {
+    return a->space == b->space 
+        && a->dim_x == b->dim_x
+        && a->dim_y == b->dim_y
+        && a->dim_z == b->dim_z;
+}
+
+void pushNewUniqueSpins(std::vector<Space> *existingSpaces, Space* spaceToSpin) {
+    Space spins[4] = {};
+    spins[0] = *spaceToSpin;
+    for (int i = 0; i < 3; i++) {
+        spins[i + 1] = spins[i];
+        rotate90X(&spins[i + 1]);
+    }
+    for (int i = 0; i < 4; i++) {
+        bool matchFound = false;
+        for (Space &existingSpace : *existingSpaces) {
+            if (isMatch(&existingSpace, &spins[i])) {
+                matchFound = true;
+                break;
             }
         }
-        auto temp = space->dim_x;
-        space->dim_x = space->dim_y;
-        space->dim_y = temp;
-        space->space = new_space;
-    }
-
-    auto isMatch(Space *a, Space *b) -> bool {
-        return a->space == b->space 
-            && a->dim_x == b->dim_x
-            && a->dim_y == b->dim_y
-            && a->dim_z == b->dim_z;
-    }
-
-    auto pushNewUniqueSpins(std::vector<Space> *existingSpaces, Space* spaceToSpin) -> void {
-        Space spins[4] = {};
-        spins[0] = *spaceToSpin;
-        for (int i = 0; i < 3; i++) {
-            spins[i + 1] = spins[i];
-            rotate90X(&spins[i + 1]);
+        if (!matchFound) {
+            existingSpaces->push_back(spins[i]);
         }
-        for (int i = 0; i < 4; i++) {
-            auto matchFound = false;
-            for (auto &existingSpace : *existingSpaces) {
-                if (isMatch(&existingSpace, &spins[i])) {
-                    matchFound = true;
-                    break;
-                }
-            }
-            if (!matchFound) {
-                existingSpaces->push_back(spins[i]);
-            }
-        }
-    }
-
-    auto pushXAxisSpins(std::vector<Space> *existingSpaces, Space* spaceToSpin) -> void {
-        auto refSpace = *spaceToSpin;
-        for (int i = 0; i < 4; i++) {
-            rotate90X(&refSpace);
-            existingSpaces->push_back(refSpace);
-        }
-    }
-
-    auto getUniqueRotations(Space *space) -> std::vector<Space> {
-        auto rotations = std::vector<Space>();
-        rotations.reserve(24);
-        auto refSpace = *space;
-        cullEmptySpace(&refSpace);
-        pushNewUniqueSpins(&rotations, &refSpace);
-        rotate90Y(&refSpace);
-        pushNewUniqueSpins(&rotations, &refSpace);
-        rotate90Y(&refSpace);
-        pushNewUniqueSpins(&rotations, &refSpace);
-        rotate90Y(&refSpace);
-        pushNewUniqueSpins(&rotations, &refSpace);
-        rotate90Z(&refSpace);
-        pushNewUniqueSpins(&rotations, &refSpace);
-        rotate90Z(&refSpace);
-        rotate90Z(&refSpace);
-        pushNewUniqueSpins(&rotations, &refSpace);
-        return rotations;
-    }
-
-    auto getAllRotations(Space *space) -> std::vector<Space> {
-        auto rotations = std::vector<Space>();
-        rotations.reserve(24);
-        auto refSpace = *space;
-        pushXAxisSpins(&rotations, &refSpace);
-        rotate90Y(&refSpace);
-        pushXAxisSpins(&rotations, &refSpace);
-        rotate90Y(&refSpace);
-        pushXAxisSpins(&rotations, &refSpace);
-        rotate90Y(&refSpace);
-        pushXAxisSpins(&rotations, &refSpace);
-        rotate90Z(&refSpace);
-        pushXAxisSpins(&rotations, &refSpace);
-        rotate90Z(&refSpace);
-        rotate90Z(&refSpace);
-        pushXAxisSpins(&rotations, &refSpace);
-        return rotations;
-    }
-
-    auto getAllPositionsInPrism(Space *space, int prism_dims[3]) -> std::vector<uint64_t> {
-        auto cubePositions = std::vector<uint64_t>();
-        if (space->dim_x > prism_dims[0] || space->dim_y > prism_dims[1] || space->dim_z > prism_dims[2]) {
-            return cubePositions;
-        }
-        auto xPositionCount = prism_dims[0] - space->dim_x + 1;
-        auto yPositionCount = prism_dims[1] - space->dim_y + 1;
-        auto zPositionCount = prism_dims[2] - space->dim_z + 1;
-        for (int x = 0; x < xPositionCount; x++) {
-            for (int y = 0; y < yPositionCount; y++) {
-                for (int z = 0; z < zPositionCount; z++) {
-                    auto new_space = 0ul;
-                    for (int posX = 0; posX < space->dim_x; posX++) {
-                        for (int posY = 0; posY < space->dim_y; posY++) {
-                            for (int posZ = 0; posZ < space->dim_z; posZ++) {
-                                auto set_val = filledAt(space, posX, posY, posZ);
-                                auto index_to_set = index(prism_dims[1], prism_dims[2], x + posX, y + posY, z + posZ);
-                                new_space = set(new_space, index_to_set, set_val);
-                            }
-                        }
-                    }
-                    cubePositions.push_back(new_space);
-                }
-            }
-        }
-        return cubePositions;
-    }
-
-    auto getAllPermutationsInPrism(Space *space, int prism_dims[3]) -> std::vector<uint64_t> {
-        auto rotations = getUniqueRotations(space);
-        auto result = std::vector<uint64_t>();
-        for (auto &rotation : rotations) {
-            auto positions = getAllPositionsInPrism(&rotation, prism_dims);
-            result.insert(result.end(), positions.begin(), positions.end());
-        }
-        return result;
-    }
-
-    auto size(uint64_t space) -> int {
-        auto size = 0;
-        for (int i = 0; i < 64; i++) {
-            if ((space & (1ul << i)) != 0) {
-                size++;
-            }
-        }
-        return size;
     }
 }
+
+void pushXAxisSpins(std::vector<Space> *existingSpaces, Space* spaceToSpin) {
+    Space refSpace = *spaceToSpin;
+    for (int i = 0; i < 4; i++) {
+        rotate90X(&refSpace);
+        existingSpaces->push_back(refSpace);
+    }
+}
+
+std::vector<Space> getUniqueRotations(Space *space) {
+    std::vector<Space> rotations = std::vector<Space>();
+    rotations.reserve(24);
+    auto refSpace = *space;
+    cullEmptySpace(&refSpace);
+    pushNewUniqueSpins(&rotations, &refSpace);
+    rotate90Y(&refSpace);
+    pushNewUniqueSpins(&rotations, &refSpace);
+    rotate90Y(&refSpace);
+    pushNewUniqueSpins(&rotations, &refSpace);
+    rotate90Y(&refSpace);
+    pushNewUniqueSpins(&rotations, &refSpace);
+    rotate90Z(&refSpace);
+    pushNewUniqueSpins(&rotations, &refSpace);
+    rotate90Z(&refSpace);
+    rotate90Z(&refSpace);
+    pushNewUniqueSpins(&rotations, &refSpace);
+    return rotations;
+}
+
+std::vector<Space> getAllRotations(Space *space) {
+    std::vector<Space> rotations = {};
+    rotations.reserve(24);
+    Space refSpace = *space;
+    pushXAxisSpins(&rotations, &refSpace);
+    rotate90Y(&refSpace);
+    pushXAxisSpins(&rotations, &refSpace);
+    rotate90Y(&refSpace);
+    pushXAxisSpins(&rotations, &refSpace);
+    rotate90Y(&refSpace);
+    pushXAxisSpins(&rotations, &refSpace);
+    rotate90Z(&refSpace);
+    pushXAxisSpins(&rotations, &refSpace);
+    rotate90Z(&refSpace);
+    rotate90Z(&refSpace);
+    pushXAxisSpins(&rotations, &refSpace);
+    return rotations;
+}
+
+std::vector<uint64> getAllPositionsInPrism(Space *space, int prism_dims[3]) {
+    std::vector<uint64> cubePositions = {};
+    if (space->dim_x > prism_dims[0] || space->dim_y > prism_dims[1] || space->dim_z > prism_dims[2]) {
+        return cubePositions;
+    }
+    int xPositionCount = prism_dims[0] - space->dim_x + 1;
+    int yPositionCount = prism_dims[1] - space->dim_y + 1;
+    int zPositionCount = prism_dims[2] - space->dim_z + 1;
+    for (int x = 0; x < xPositionCount; x++) {
+        for (int y = 0; y < yPositionCount; y++) {
+            for (int z = 0; z < zPositionCount; z++) {
+                uint64 new_space = 0;
+                for (int posX = 0; posX < space->dim_x; posX++) {
+                    for (int posY = 0; posY < space->dim_y; posY++) {
+                        for (int posZ = 0; posZ < space->dim_z; posZ++) {
+                            bool set_val = filledAt(space, posX, posY, posZ);
+                            int index_to_set = index(prism_dims[1], prism_dims[2], x + posX, y + posY, z + posZ);
+                            new_space = set(new_space, index_to_set, set_val);
+                        }
+                    }
+                }
+                cubePositions.push_back(new_space);
+            }
+        }
+    }
+    return cubePositions;
+}
+
+std::vector<uint64> getAllPermutationsInPrism(Space *space, int prism_dims[3]) {
+    std::vector<Space> rotations = getUniqueRotations(space);
+    std::vector<uint64> result = std::vector();
+    for (auto &rotation : rotations) {
+        auto positions = getAllPositionsInPrism(&rotation, prism_dims);
+        result.insert(result.end(), positions.begin(), positions.end());
+    }
+    return result;
+}
+
+int size(uint64_t space) {
+    int size = 0;
+    for (int i = 0; i < 64; i++) {
+        if ((space & (1ul << i)) != 0) {
+            size++;
+        }
+    }
+    return size;
+}
diff --git a/src/VoxelSpace.h b/src/VoxelSpace.h
index d2e2e7b..d69775a 100644
--- a/src/VoxelSpace.h
+++ b/src/VoxelSpace.h
@@ -2,67 +2,65 @@
 #define VOXELSPACE_H
 
 #include <vector>
-#include <cstdint>
+#include "lib/djstdlib/core.h"
 
-namespace Voxel {
-    constexpr int NUM_ROTS_3D = 24;
+constexpr int NUM_ROTS_3D = 24;
 
-    struct Extrema {
-       int xMax;
-       int xMin;
-       int yMax;
-       int yMin;
-       int zMax;
-       int zMin;
-    };
+struct Extrema {
+   int xMax;
+   int xMin;
+   int yMax;
+   int yMin;
+   int zMax;
+   int zMin;
+};
 
-    struct Space {
-        uint64_t space;
-        int dim_x;
-        int dim_y;
-        int dim_z;
-    };
+struct Space {
+    uint64 space;
+    int dim_x;
+    int dim_y;
+    int dim_z;
+};
 
-    auto newIndexRotX(Space *space, int x, int y, int z) -> int;
+int newIndexRotX(Space *space, int x, int y, int z);
 
-    auto newIndexRotY(Space *space, int x, int y, int z) -> int;
+int newIndexRotY(Space *space, int x, int y, int z);
 
-    auto newIndexRotZ(Space *space, int x, int y, int z) -> int;
+int newIndexRotZ(Space *space, int x, int y, int z);
 
-    auto toggle(uint64_t space, int index) -> uint64_t;
+uint64 toggle(uint64 space, int index);
 
-    auto set(uint64_t space, int index, bool val) -> uint64_t;
+uint64 set(uint64 space, int index, bool val);
 
-    auto collides(Space *a, Space *b) -> bool;
-    auto collides(uint64_t a, uint64_t b) -> bool;
+bool collides(Space *a, Space *b);
+bool collides(uint64 a, uint64 b);
 
-    auto add(Space *a, Space *b) -> Space;
+Space add(Space *a, Space *b);
 
-    auto filledAt(Space *space, int x, int y, int z) -> bool;
+bool filledAt(Space *space, int x, int y, int z);
 
-    auto getExtrema(Space *space) -> Extrema;
+Extrema getExtrema(Space *space);
 
-    auto cullEmptySpace(Space *space) -> void;
+void cullEmptySpace(Space *space);
 
-    auto isMatch(Space *a, Space *b) -> bool;
+bool isMatch(Space *a, Space *b);
 
-    auto rotate90X(Space *space) -> void;
+void rotate90X(Space *space);
 
-    auto rotate90Y(Space *space) -> void;
+void rotate90Y(Space *space);
 
-    auto rotate90Z(Space *space) -> void;
+void rotate90Z(Space *space);
 
-    auto pushNewUniqueSpins(std::vector<Space> *existingSpaces, Space* spaceToSpin) -> void;
+void pushNewUniqueSpins(std::vector<Space> *existingSpaces, Space* spaceToSpin);
 
-    auto getUniqueRotations(Space *space) -> std::vector<Space>;
+std::vector<Space> getUniqueRotations(Space *space);
 
-    auto getAllRotations(Space *space) -> std::vector<Space>;
+std::vector<Space> getAllRotations(Space *space);
 
-    auto getAllPositionsInPrism(Space *space, int prism_dims[3]) -> std::vector<uint64_t>;
+std::vector<uint64> getAllPositionsInPrism(Space *space, int prism_dims[3]);
 
-    auto getAllPermutationsInPrism(Space *space, int prism_dims[3]) -> std::vector<uint64_t>;
+std::vector<uint64> getAllPermutationsInPrism(Space *space, int prism_dims[3]);
 
-    auto size(uint64_t space) -> int;
-}
+int size(uint64 space);
 
 #endif 
diff --git a/src/c.zig b/src/c.zig
deleted file mode 100644
index b2aa51f..0000000
--- a/src/c.zig
+++ /dev/null
@@ -1,9 +0,0 @@
-pub usingnamespace @cImport({
-    @cInclude("glad/glad.h");
-    @cInclude("GLFW/glfw3.h");
-
-    @cDefine("STB_IMAGE_IMPLEMENTATION", "");
-    @cDefine("TINYOBJ_LOADER_C_IMPLEMENTATION", "");
-    @cInclude("loaders/stb_image.h");
-    @cInclude("loaders/tinyobj.h");
-});
diff --git a/src/gfx/Color.cpp b/src/gfx/Color.cpp
index b408234..73e32ac 100644
--- a/src/gfx/Color.cpp
+++ b/src/gfx/Color.cpp
@@ -1,11 +1,8 @@
-#include <cstdint>
 #include <glm/ext/vector_float3.hpp>
-#include <string>
 #include <math.h>
-#include <iostream>
-#include "Color.h"
+#include "../lib/djstdlib/core.h"
 
-auto hue_to_rgb(float p, float q, float t) -> float {
+real32 hue_to_rgb(float p, float q, float t) {
     if (t < 0) {
         t += 1;
     } else if (t > 1) {
@@ -17,7 +14,7 @@ auto hue_to_rgb(float p, float q, float t) -> float {
     return p;
 };
 
-auto hsl_to_hex(float h, float s, float l) -> glm::vec3 {
+glm::vec3 hsl_to_hex(real32 h, real32 s, real32 l) {
     h /= 360;
     s /= 100;
     l /= 100;
@@ -34,7 +31,7 @@ auto hsl_to_hex(float h, float s, float l) -> glm::vec3 {
     return glm::vec3(r, g, b);
 }
 
-auto Color::color_from_index(int index) -> glm::vec3 {
+glm::vec3 color_from_index(int index) {
     auto color_wheel_cycle = floorf(index / 6.0f);
     auto darkness_cycle = floorf(index / 12.0f);
     auto spacing = (360.0f / 6.0f);
diff --git a/src/gfx/Color.h b/src/gfx/Color.h
index 60781bb..1bca765 100644
--- a/src/gfx/Color.h
+++ b/src/gfx/Color.h
@@ -1,5 +1,3 @@
 #include <glm/ext/vector_float3.hpp>
 
-namespace Color {
-    auto color_from_index(int index) -> glm::vec3; 
-};
+glm::vec3 color_from_index(int index); 
diff --git a/src/gfx/Color.zig b/src/gfx/Color.zig
deleted file mode 100644
index 2491d4c..0000000
--- a/src/gfx/Color.zig
+++ /dev/null
@@ -1,43 +0,0 @@
-fn hue_to_rgb(p: f32, q: f32, t: f32) f32 {
-    if (t < 0) {
-        t += 1;
-    } else if (t > 1) {
-        t -= 1;
-    }
-    if (t < 1.0 / 6) return p + (q - p) * 6 * t;
-    if (t < 1.0 / 2) return q;
-    if (t < 2.0 / 3) return p + (q - p) * (2.0 / 3 - t) * 6;
-    return p;
-}
-
-fn hsl_to_hex(h: f32, s: f32, l: f32) @Vector(3, f32) {
-    h /= 360;
-    s /= 100;
-    l /= 100;
-    const r: f32;
-    const g: f32;
-    const b: f32;
-    if (s == 0) {
-        r = l;
-        g = l;
-        b = l;
-    } else {
-        const q = if (l < 0.5) l * (1 + s) else l + s - l * s;
-        const p = 2 * l - q;
-        r = hue_to_rgb(p, q, h + 1.0 / 3);
-        g = hue_to_rgb(p, q, h);
-        b = hue_to_rgb(p, q, h - 1.0 / 3);
-    }
-    return @Vector(3, f32){ r, g, b };
-}
-
-pub fn color_from_index(index: i32) @Vector(3, f32) {
-    const color_wheel_cycle = @floor(index / 6.0);
-    const darkness_cycle = @floor(index / 12.0);
-    const spacing = (360.0 / 6.0);
-    const offset = if (color_wheel_cycle == 0) 0 else spacing / (color_wheel_cycle + 2);
-    const hue = spacing * (index % 6) + offset;
-    const saturation = 100.0f;
-    const lightness = 1.0f / (2 + darkness_cycle) * 100;
-    return hsl_to_hex(hue, saturation, lightness);
-}
diff --git a/src/gfx/Mesh.cpp b/src/gfx/Mesh.cpp
index 4d5daa2..c9f6ab4 100644
--- a/src/gfx/Mesh.cpp
+++ b/src/gfx/Mesh.cpp
@@ -1,6 +1,6 @@
 #include <iostream>
 #include "Mesh.h"
-#include "loaders/tinyobj.h"
+#include "../lib/loaders/tinyobj.h"
 
 auto Mesh::init(const char* obj_file) -> void {
     auto reader = tinyobj::ObjReader();
diff --git a/src/gfx/Mesh.h b/src/gfx/Mesh.h
index 5a435bf..c8a0c7b 100644
--- a/src/gfx/Mesh.h
+++ b/src/gfx/Mesh.h
@@ -1,7 +1,7 @@
 #ifndef LEDDA_MESH_H
 #define LEDDA_MESH_H
 
-#include "glad/glad.h"
+#include "../lib/glad/glad.h"
 #include "geometry.h"
 
 struct Mesh {
@@ -11,8 +11,8 @@ struct Mesh {
     unsigned int vbo_norm;
     unsigned int ebo;
     unsigned int num_indices;
-    auto init(const char* obj_file) -> void;
-    auto init(const LeddaGeometry::Shape* shape) -> void;
+    void init(const char* obj_file);
+    void init(const Shape* shape);
 };
 
 #endif
diff --git a/src/gfx/Mesh.zig b/src/gfx/Mesh.zig
deleted file mode 100644
index bac0bf5..0000000
--- a/src/gfx/Mesh.zig
+++ /dev/null
@@ -1,94 +0,0 @@
-const std = @import("std");
-const c = @import("../c.zig");
-const djleddaGeom = @import("djleddaGeom.zig");
-
-pub const Mesh = struct {
-    vao: c_uint,
-    vbo_xyz: c_uint,
-    vbo_uv: c_uint,
-    vbo_norm: c_uint,
-    ebo: c_uint,
-    num_indices: c_uint,
-
-    pub fn from_shape(shape: *const djleddaGeom.Shape) void {
-        const mesh = Mesh{};
-        mesh.num_indices = shape.indices.len;
-        c.glGenVertexArrays(1, &mesh.vao);
-        c.glGenBuffers(1, &mesh.vbo_xyz);
-        c.glGenBuffers(1, &mesh.vbo_uv);
-        c.glGenBuffers(1, &mesh.ebo);
-
-        c.glBindVertexArray(mesh.vao);
-
-        c.glBindBuffer(c.GL_ARRAY_BUFFER, mesh.vbo_xyz);
-        c.glBufferData(c.GL_ARRAY_BUFFER, shape.xyz.ptr * @sizeOf(float), shape.xyz, c.GL_STATIC_DRAW);
-        c.glVertexAttribPointer(0, 3, c.GL_FLOAT, c.GL_FALSE, 3 * @sizeOf(f32), @as(*void, 0));
-        c.glEnableVertexAttribArray(0);
-
-        c.glBindBuffer(c.GL_ARRAY_BUFFER, mesh.vbo_uv);
-        c.glBufferData(c.GL_ARRAY_BUFFER, shape.uv.ptr * @sizeOf(f32), shape.uv, c.GL_STATIC_DRAW);
-        c.glVertexAttribPointer(1, 2, c.GL_FLOAT, c.GL_FALSE, 2 * @sizeOf(f32), @as(*void, 0));
-        c.glEnableVertexAttribArray(1);
-
-        c.glBindBuffer(c.GL_ELEMENT_ARRAY_BUFFER, mesh.ebo);
-        c.glBufferData(c.GL_ELEMENT_ARRAY_BUFFER, shape.indices.len * @sizeOf(c_uint), shape.indices.ptr, c.GL_STATIC_DRAW);
-    }
-
-//    pub fn init(obj_file: *[]const u8) void {
-//        const reader = c.tinyobj.ObjReader();
-//        const success = reader.ParseFromFile(obj_file);
-//        std.debug.print("{}\n", .{reader.Error()});
-//
-//        const attrib = reader.GetAttrib();
-//
-//        const indices_t = reader.GetShapes().at(0).mesh.indices;
-//        const indices = ArrayList(c_uint)(indices_t.size());
-//
-//        const vertices = ArrayList()(3*indices_t.size());
-//        const normals = ArrayList()(3*indices_t.size());
-//        const texcoords = ArrayList()(2*indices_t.size());
-//
-//        for (int i = 0; i < indices_t.size(); i++) {
-//            const vertex_data = indices_t[i];
-//            vertices[3*i] = attrib.vertices[3*vertex_data.vertex_index];
-//            vertices[3*i+1] = attrib.vertices[3*vertex_data.vertex_index + 1];
-//            vertices[3*i+2] = attrib.vertices[3*vertex_data.vertex_index + 2];
-//
-//            normals[3*i] = attrib.normals[3*vertex_data.normal_index];
-//            normals[3*i+1] = attrib.normals[3*vertex_data.normal_index + 1];
-//            normals[3*i+2] = attrib.normals[3*vertex_data.normal_index + 2];
-//
-//            texcoords[2*i] = attrib.texcoords[2*vertex_data.texcoord_index];
-//            texcoords[2*i+1] = attrib.texcoords[2*vertex_data.texcoord_index + 1];
-//
-//            indices[i] = i;
-//        }
-//
-//        num_indices = indices_t.size();
-//        glGenVertexArrays(1, &vao);
-//        glGenBuffers(1, &vbo_xyz);
-//        glGenBuffers(1, &vbo_uv);
-//        glGenBuffers(1, &vbo_norm);
-//        //glGenBuffers(1, &ebo);
-//
-//        glBindVertexArray(vao);
-//
-//        glBindBuffer(GL_ARRAY_BUFFER, vbo_xyz);
-//        glBufferData(GL_ARRAY_BUFFER, vertices.size() * sizeof(float), vertices.data(), GL_STATIC_DRAW);
-//        glVertexAttribPointer(0, 3, GL_FLOAT, GL_FALSE, 3 * sizeof(float), (void*)0);
-//        glEnableVertexAttribArray(0);
-//
-//        glBindBuffer(GL_ARRAY_BUFFER, vbo_uv);
-//        glBufferData(GL_ARRAY_BUFFER, texcoords.size() * sizeof(float), texcoords.data(), GL_STATIC_DRAW);
-//        glVertexAttribPointer(1, 2, GL_FLOAT, GL_FALSE, 2 * sizeof(float), (void*)0);
-//        glEnableVertexAttribArray(1);
-//
-//        glBindBuffer(GL_ARRAY_BUFFER, vbo_norm);
-//        glBufferData(GL_ARRAY_BUFFER, normals.size() * sizeof(float), normals.data(), GL_STATIC_DRAW);
-//        glVertexAttribPointer(2, 3, GL_FLOAT, GL_FALSE, 3 * sizeof(float), (void*)0);
-//        glEnableVertexAttribArray(2);
-//
-//        //glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, ebo);
-//        //glBufferData(GL_ELEMENT_ARRAY_BUFFER, indices.size() * sizeof(unsigned int), indices.data(), GL_STATIC_DRAW);
-//    }
-};
diff --git a/src/gfx/OrbitControls.cpp b/src/gfx/OrbitControls.cpp
deleted file mode 100644
index e69de29..0000000
diff --git a/src/gfx/OrbitControls.h b/src/gfx/OrbitControls.h
deleted file mode 100644
index c4e7f7d..0000000
--- a/src/gfx/OrbitControls.h
+++ /dev/null
@@ -1,81 +0,0 @@
-#ifndef ORBIT_CONTROLS_H
-#define ORBIT_CONTROLS_H
-
-#include "glad/glad.h"
-#include <GLFW/glfw3.h>
-#include <glm/glm.hpp>
-#include <glm/gtc/type_ptr.hpp>
-#include <glm/gtc/matrix_transform.hpp>
-#include "loaders/stb_image.h"
-
-constexpr auto ROTATION_FACTOR = 1.0f / 200.0f;
-
-struct Point {
-    float x;
-    float y;
-};
-
-class OrbitControls {
-private:
-    bool dragging;
-    bool hovered;
-    bool scrolling;
-    bool flyingEnabled;
-    float lastX;
-    float lastY;
-    Point lastScroll1;
-    Point lastScroll2;
-    glm::vec3 y_axis;
-    glm::vec3 x_axis;
-    glm::vec3 start;
-    Entity* orbited_object;
-
-    OrbitControls(Entity* orbited, Camera* camera) {
-        camera = camera;
-        orbited_object = orbited;
-        y_axis = orbited_object.worldToLocal(camera.up);
-        x_axis = orbited_object.position.sub(camera.position);
-        x_axis /= sqrt(pow(x_axis.x) + pow(x_axis.y, 2) + pow(x_axis.z, 2));
-        x_axis = glm::cross(x_axis, y_axis);
-        start = orbited_object.rotation;
-
-        this.element.addEventListener('wheel', (ev) => this.handleScroll(ev));
-        this.element.addEventListener('mouseover', () => this.hovered = true);
-        this.element.addEventListener('mouseout', () => this.hovered = false);
-        this.element.addEventListener('mousedown', (ev) => this.handleMouseDown(ev));
-        window.addEventListener('mousemove', (ev) => this.handleMove(ev));
-        window.addEventListener('mouseup', () => this.dragging = false);
-    }
-
-    on_mouse_down(event) {
-        if (event.button === 1) {
-            this.object.setRotationFromEuler(this.start);
-        }
-        if (!this.dragging) {
-            this.lastX = event.x;
-            this.lastY = event.y;
-            this.dragging = true;
-        }
-    }
-
-    on_mouse_move(event) {
-        if (dragging) {
-            auto x_diff = event.movementX * ROTATION_FACTOR;
-            auto y_diff = event.movementY * ROTATION_FACTOR;
-            glm::rotate(&orbited_object, x_diff, &y_axis);
-            //rotate on world axis ??? 
-            glm::rotate(&orbited_object, y_diff &x_axis);
-        }
-    }
-
-    on_scroll(event) {
-        if (this.flyingEnabled && this.hovered) {
-            for (const fliable of this.fliables) {
-                const direction = event.deltaY / Math.abs(event.deltaY);
-                fliable.flyBy(direction / 10);
-            }
-        }
-    }
-}
-
-#endif
diff --git a/src/gfx/Shader.cpp b/src/gfx/Shader.cpp
index eed0e28..19c128b 100644
--- a/src/gfx/Shader.cpp
+++ b/src/gfx/Shader.cpp
@@ -1,43 +1,42 @@
-#include "glad/glad.h"
-#include <array>
-#include <string>
 #include <fstream>
 #include <sstream>
 #include <iostream>
 #include "Shader.h"
+#include "../lib/djstdlib/core.h"
+#include "../lib/glad/glad.h"
 
 enum ShaderType {
     fragment=GL_FRAGMENT_SHADER,
     vertex=GL_VERTEX_SHADER,
 };
 
-auto create_shader(const char* file_path, ShaderType shader_type, char* info_log) -> unsigned int {
+uint32 create_shader(const char* file_path, ShaderType shader_type, char* info_log) {
     std::stringstream shader_stream;
     std::ifstream shader_file;
     shader_file.open(file_path);
     shader_stream << shader_file.rdbuf();
     shader_file.close();
-    auto shader_string = shader_stream.str();
-    const auto shader_code = shader_string.c_str();
+    std::string string = shader_stream.str();
+    const char *shader_code = string.c_str();
 
-    auto vertex_shader = glCreateShader(shader_type);
+    GLuint vertex_shader = glCreateShader(shader_type);
     glShaderSource(vertex_shader, 1, &shader_code, NULL);
     glCompileShader(vertex_shader);
     int success;
     glGetShaderiv(vertex_shader, GL_COMPILE_STATUS, &success);
     if (!success) {
         glGetShaderInfoLog(vertex_shader, 512, NULL, info_log);
-        auto shader_type_name = shader_type == ShaderType::fragment ? "FRAGMENT" : "VERTEX";
+        const char* shader_type_name = shader_type == ShaderType::fragment ? "FRAGMENT" : "VERTEX";
         std::cout << "ERROR::SHADER::" << shader_type_name << "::COMPILATION_FAILED\n" << info_log << std::endl;
     }
 
     return vertex_shader;
 }
 
-auto Shader::init(const char* vertex_path, const char* fragment_path) -> void {
-    auto info_log = std::array<char, 512>();
-    auto vertex_shader = create_shader(vertex_path, ShaderType::vertex, info_log.data());
-    auto fragment_shader = create_shader(fragment_path, ShaderType::fragment, info_log.data());
+void Shader::init(const char* vertex_path, const char* fragment_path) {
+    char info_log[512] = {0};
+    uint32 vertex_shader = create_shader(vertex_path, ShaderType::vertex, info_log);
+    uint32 fragment_shader = create_shader(fragment_path, ShaderType::fragment, info_log);
 
     prog_id = glCreateProgram();
     glAttachShader(prog_id, vertex_shader);
@@ -47,8 +46,8 @@ auto Shader::init(const char* vertex_path, const char* fragment_path) -> void {
     int success;
     glGetProgramiv(prog_id, GL_LINK_STATUS, &success);
     if (!success) {
-        glGetProgramInfoLog(prog_id, 512, NULL, info_log.data());
-        std::cout << "ERROR::SHADER::PROGRAM::LINK_FAILED\n" << info_log.data() << std::endl;
+        glGetProgramInfoLog(prog_id, 512, NULL, info_log);
+        std::cout << "ERROR::SHADER::PROGRAM::LINK_FAILED\n" << info_log << std::endl;
     }
 
     glDeleteShader(vertex_shader);
diff --git a/src/gfx/Shader.h b/src/gfx/Shader.h
index 7e02dd6..2245035 100644
--- a/src/gfx/Shader.h
+++ b/src/gfx/Shader.h
@@ -3,7 +3,7 @@
 
 struct Shader {
     unsigned int prog_id;
-    auto init(const char* vertex_path, const char* fragment_path) -> void;
+    void init(const char* vertex_path, const char* fragment_path);
 };
 
 #endif
diff --git a/src/gfx/Shader.zig b/src/gfx/Shader.zig
deleted file mode 100644
index 26bcce5..0000000
--- a/src/gfx/Shader.zig
+++ /dev/null
@@ -1,56 +0,0 @@
-const c = @import("../c.zig");
-const std = @import("std");
-
-const ShaderType = enum(u32) {
-    fragment = c.GL_FRAGMENT_SHADER,
-    vertex = c.GL_VERTEX_SHADER,
-};
-
-fn create_shader(file_path: []const u8, shader_type: ShaderType, info_log: *[]const u8, allocator: *std.mem.Allocator) c_uint {
-    const file = try std.fs.openFileAbsolute(file_path);
-
-    const file_reader = file.reader(file);
-    const shader_code = std.ArrayList(u8);
-    shader_code.initCapacity(allocator, 1024);
-    defer allocator.free(shader_code);
-
-    file_reader.readAllArrayList(shader_code, 1024 * 1024);
-
-    const vertex_shader = c.glCreateShader(shader_type);
-    c.glShaderSource(vertex_shader, 1, &shader_code.items, c.NULL);
-    c.glCompileShader(vertex_shader);
-    const success: i32 = undefined;
-    c.glGetShaderiv(vertex_shader, c.GL_COMPILE_STATUS, &success);
-    if (success != 0) {
-        c.glGetShaderInfoLog(vertex_shader, 512, c.NULL, info_log);
-        const shader_type_name = if (shader_type == ShaderType.fragment) "FRAGMENT" else "VERTEX";
-        std.debug.print("ERROR::SHADER::{}::COMPILATION_FAILED\n{}\n", .{ shader_type_name, info_log });
-    }
-
-    return vertex_shader;
-}
-
-const Shader = struct {
-    prog_id: c_uint,
-
-    pub fn init(self: Shader, vertex_path: *[]const u8, fragment_path: *[]const u8, allocator: *std.mem.Allocator) void {
-        const info_log = [512]u8{};
-        const vertex_shader = create_shader(vertex_path, ShaderType.vertex, &info_log, allocator);
-        const fragment_shader = create_shader(fragment_path, ShaderType.fragment, &info_log, allocator);
-
-        self.prog_id = c.glCreateProgram();
-        c.glAttachShader(self.prog_id, vertex_shader);
-        c.glAttachShader(self.prog_id, fragment_shader);
-        c.glLinkProgram(self.prog_id);
-
-        const success: c_uint = undefined;
-        c.glGetProgramiv(self.prog_id, c.GL_LINK_STATUS, &success);
-        if (!success) {
-            c.glGetProgramInfoLog(self.prog_id, 512, c.NULL, &info_log);
-            std.debug.print("ERROR::SHADER::PROGRAM::LINK_FAILED\n{}\n", .{info_log});
-        }
-
-        c.glDeleteShader(vertex_shader);
-        c.glDeleteShader(fragment_shader);
-    }
-};
diff --git a/src/gfx/Texture.cpp b/src/gfx/Texture.cpp
index 12af1a7..35412b5 100644
--- a/src/gfx/Texture.cpp
+++ b/src/gfx/Texture.cpp
@@ -1,9 +1,9 @@
 #include "Texture.h"
 #include <iostream>
-#include "loaders/stb_image.h"
-#include "glad/glad.h"
+#include "../lib/loaders/stb_image.h"
+#include "../lib/glad/glad.h"
 
-auto Texture::init(const char* source_path) -> void {
+void Texture::init(const char* source_path) {
     glGenTextures(1, &tex_id);
     glBindTexture(GL_TEXTURE_2D, tex_id);
     glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT);	
@@ -12,7 +12,7 @@ auto Texture::init(const char* source_path) -> void {
     glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
 
     int nr_channels;
-    auto data = stbi_load(source_path, &width, &height, &nr_channels, 0);
+    stbi_uc *data = stbi_load(source_path, &width, &height, &nr_channels, 0);
     if (data) {
         glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB, width, height, 0, GL_RGB, GL_UNSIGNED_BYTE, data);
         glGenerateMipmap(GL_TEXTURE_2D);
diff --git a/src/gfx/Texture.h b/src/gfx/Texture.h
index 19eb7be..2c915c4 100644
--- a/src/gfx/Texture.h
+++ b/src/gfx/Texture.h
@@ -5,7 +5,7 @@ struct Texture {
     unsigned int tex_id;
     int width;
     int height;
-    auto init(const char* source_path) -> void;
+    void init(const char* source_path);
 };
 
 #endif
diff --git a/src/gfx/djleddaGeom.zig b/src/gfx/djleddaGeom.zig
deleted file mode 100644
index eb95d8e..0000000
--- a/src/gfx/djleddaGeom.zig
+++ /dev/null
@@ -1,57 +0,0 @@
-// Buffer layout:
-// X, Y, Z, U, V
-
-pub const Shape = struct {
-    indices: []c_uint,
-    uv: []f32,
-    xyz: []f32,
-};
-
-const triangle_vertices = []f32{
-    -0.5, -0.5, 0.0, 1.0, 1.0,
-    0.5,  -0.5, 0.0, 0.5, 0.5,
-    0.0,  0.5,  0.0, 0.0, 0.0,
-};
-
-const triangle_indices = []c_uint{ 0, 1, 2 };
-
-const cube_vertices = []f32{ -0.5, -0.5, -0.5, 0.0, 0.0, 0.5, -0.5, -0.5, 1.0, 0.0, 0.5, 0.5, -0.5, 1.0, 1.0, 0.5, 0.5, -0.5, 1.0, 1.0, -0.5, 0.5, -0.5, 0.0, 1.0, -0.5, -0.5, -0.5, 0.0, 0.0, -0.5, -0.5, 0.5, 0.0, 0.0, 0.5, -0.5, 0.5, 1.0, 0.0, 0.5, 0.5, 0.5, 1.0, 1.0, 0.5, 0.5, 0.5, 1.0, 1.0, -0.5, 0.5, 0.5, 0.0, 1.0, -0.5, -0.5, 0.5, 0.0, 0.0, -0.5, 0.5, 0.5, 1.0, 0.0, -0.5, 0.5, -0.5, 1.0, 1.0, -0.5, -0.5, -0.5, 0.0, 1.0, -0.5, -0.5, -0.5, 0.0, 1.0, -0.5, -0.5, 0.5, 0.0, 0.0, -0.5, 0.5, 0.5, 1.0, 0.0, 0.5, 0.5, 0.5, 1.0, 0.0, 0.5, 0.5, -0.5, 1.0, 1.0, 0.5, -0.5, -0.5, 0.0, 1.0, 0.5, -0.5, -0.5, 0.0, 1.0, 0.5, -0.5, 0.5, 0.0, 0.0, 0.5, 0.5, 0.5, 1.0, 0.0, -0.5, -0.5, -0.5, 0.0, 1.0, 0.5, -0.5, -0.5, 1.0, 1.0, 0.5, -0.5, 0.5, 1.0, 0.0, 0.5, -0.5, 0.5, 1.0, 0.0, -0.5, -0.5, 0.5, 0.0, 0.0, -0.5, -0.5, -0.5, 0.0, 1.0, -0.5, 0.5, -0.5, 0.0, 1.0, 0.5, 0.5, -0.5, 1.0, 1.0, 0.5, 0.5, 0.5, 1.0, 0.0, 0.5, 0.5, 0.5, 1.0, 0.0, -0.5, 0.5, 0.5, 0.0, 0.0, -0.5, 0.5, -0.5, 0.0, 1.0 };
-
-const cube_indices = []c_uint{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31 };
-
-const square_xyz = []f32{
-    0.5,  0.5,  0.0,
-    0.5,  -0.5, 0.0,
-    -0.5, -0.5, 0.0,
-    -0.5, 0.5,  0.0,
-};
-
-const square_uv = []f32{
-    1.0, 1.0,
-    1.0, 0.0,
-    0.0, 0.0,
-    0.0, 1.0,
-};
-
-const square_indices = []c_uint{
-    0, 1, 3,
-    1, 2, 3,
-};
-
-pub const TRIANGLE = Shape{
-    .indices = triangle_indices,
-    .uv = triangle_vertices,
-    .xyz = triangle_vertices,
-};
-
-pub const SQUARE = Shape{
-    .indices = square_indices,
-    .uv = square_uv,
-    .xyz = square_xyz,
-};
-
-pub const CUBE = Shape{
-    .indices = cube_indices,
-    .uv = triangle_vertices,
-    .xyz = triangle_vertices,
-};
diff --git a/src/gfx/geometry.cpp b/src/gfx/geometry.cpp
index d8900da..79e06d4 100644
--- a/src/gfx/geometry.cpp
+++ b/src/gfx/geometry.cpp
@@ -1,20 +1,20 @@
-#include <array>
 #include "geometry.h"
+#include "../lib/djstdlib/core.h"
 
 // Buffer layout:
 // X, Y, Z, U, V
 
-auto triangle_vertices = std::to_array<float>({
+real32 triangle_vertices[] = {
     -0.5f, -0.5f, 0.0f, 1.0f, 1.0f, 
      0.5f, -0.5f, 0.0f, 0.5f, 0.5f,
      0.0f,  0.5f, 0.0f, 0.0f, 0.0f,
-});
+};
 
-auto triangle_indices = std::to_array<unsigned int>({
+uint32 triangle_indices[] = {
     0, 1, 2
-});
+};
 
-auto cube_vertices = std::to_array<float>({
+real32 cube_vertices[] = {
     -0.5f, -0.5f, -0.5f,  0.0f, 0.0f,
      0.5f, -0.5f, -0.5f,  1.0f, 0.0f,
      0.5f,  0.5f, -0.5f,  1.0f, 1.0f,
@@ -56,57 +56,55 @@ auto cube_vertices = std::to_array<float>({
      0.5f,  0.5f,  0.5f,  1.0f, 0.0f,
     -0.5f,  0.5f,  0.5f,  0.0f, 0.0f,
     -0.5f,  0.5f, -0.5f,  0.0f, 1.0f
-});
+};
 
-auto cube_indices = std::to_array<unsigned int>({ 
+uint32 cube_indices[] = { 
     0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 
     17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31 
-});
+};
 
-auto square_xyz = std::to_array<float>({
+real32 square_xyz[] = {
      0.5f,  0.5f, 0.0f,
      0.5f, -0.5f, 0.0f,
     -0.5f, -0.5f, 0.0f,
     -0.5f,  0.5f, 0.0f,
-}); 
+}; 
 
-auto square_uv = std::to_array<float>({
+real32 square_uv[] = {
     1.0f, 1.0f, 
     1.0f, 0.0f,
     0.0f, 0.0f,
     0.0f, 1.0f,
-}); 
+}; 
 
-auto square_indices = std::to_array<unsigned int>({
+uint32 square_indices[] = {
     0, 1, 3, 
     1, 2, 3,
-});
+};
 
-namespace LeddaGeometry {
-    const Shape TRIANGLE = {
-        .indices = triangle_indices.data(),
-        .indices_size = sizeof(triangle_indices),
-        .uv = triangle_vertices.data(),
-        .uv_size = sizeof(triangle_vertices),
-        .xyz = triangle_vertices.data(),
-        .xyz_size = sizeof(triangle_vertices),
-    };
+const Shape TRIANGLE = {
+    .indices = triangle_indices,
+    .indices_size = ArrayCount(triangle_indices),
+    .uv = triangle_vertices,
+    .uv_size = ArrayCount(triangle_vertices),
+    .xyz = triangle_vertices,
+    .xyz_size = ArrayCount(triangle_vertices),
+};
 
-    const Shape SQUARE = {
-        .indices = square_indices.data(),
-        .indices_size = square_indices.size(),
-        .uv = square_uv.data(),
-        .uv_size = square_uv.size(),
-        .xyz = square_xyz.data(),
-        .xyz_size = square_xyz.size(),
-    };
+const Shape SQUARE = {
+    .indices = square_indices,
+    .indices_size = ArrayCount(square_indices),
+    .uv = square_uv,
+    .uv_size = ArrayCount(square_uv),
+    .xyz = square_xyz,
+    .xyz_size = ArrayCount(square_xyz),
+};
 
-    const Shape CUBE = {
-        .indices = cube_indices.data(),
-        .indices_size = cube_indices.size(),
-        .uv = triangle_vertices.data(),
-        .uv_size = triangle_vertices.size(),
-        .xyz = triangle_vertices.data(),
-        .xyz_size = triangle_vertices.size(),
-    };
-}
+const Shape CUBE = {
+    .indices = cube_indices,
+    .indices_size = ArrayCount(cube_indices),
+    .uv = triangle_vertices,
+    .uv_size = ArrayCount(triangle_vertices),
+    .xyz = triangle_vertices,
+    .xyz_size = ArrayCount(triangle_vertices),
+};
diff --git a/src/gfx/geometry.h b/src/gfx/geometry.h
index 3bcaa21..0eb5b2d 100644
--- a/src/gfx/geometry.h
+++ b/src/gfx/geometry.h
@@ -1,20 +1,18 @@
 #ifndef LEDDA_GEOMETRY_H
 #define LEDDA_GEOMETRY_H
 
-#include <cstddef>
+#include <stddef.h>
 
-namespace LeddaGeometry {
-    struct Shape {
-        unsigned int* indices;
-        size_t indices_size;
-        float* uv;
-        size_t uv_size;
-        float* xyz;
-        size_t xyz_size;
-    };
-    extern const Shape TRIANGLE;
-    extern const Shape SQUARE;
-    extern const Shape CUBE;
-}
+struct Shape {
+    unsigned int* indices;
+    size_t indices_size;
+    float* uv;
+    size_t uv_size;
+    float* xyz;
+    size_t xyz_size;
+};
+extern const Shape TRIANGLE;
+extern const Shape SQUARE;
+extern const Shape CUBE;
 
 #endif
diff --git a/lib/c/KHR/khrplatform.h b/src/lib/KHR/khrplatform.h
similarity index 100%
rename from lib/c/KHR/khrplatform.h
rename to src/lib/KHR/khrplatform.h
diff --git a/src/lib/djstdlib/app.cpp b/src/lib/djstdlib/app.cpp
new file mode 100644
index 0000000..d1a01a3
--- /dev/null
+++ b/src/lib/djstdlib/app.cpp
@@ -0,0 +1,14 @@
+#include <stdio.h>
+#include "core.cpp"
+#include "core.h"
+
+int main(int argc, char **argv) {
+    int statusCode = 0;
+    initialiseCore();
+    Arena *arena = arenaAlloc(Megabytes(64));
+    list<string> args = getArgs(arena, argc, argv);
+
+    prinft("%S", strSplit(arena, "-"_s, "hallo-world"_s));
+
+    return statusCode;
+}
diff --git a/src/lib/djstdlib/core.cpp b/src/lib/djstdlib/core.cpp
new file mode 100644
index 0000000..1d85fc4
--- /dev/null
+++ b/src/lib/djstdlib/core.cpp
@@ -0,0 +1,511 @@
+#include <unistd.h> // TODO(djledda): get outta here
+#include <math.h>
+#include <string.h>
+#define STB_SPRINTF_IMPLEMENTATION
+#include "core.h"
+#include "os.cpp"
+
+void *pushSize(Arena *arena, size_t bytes) {
+    if (arena->capacity - arena->head >= bytes) {
+        void *ptr = (char *)arena->memory + arena->head;
+        arena->head += bytes;
+        return ptr;
+    }
+    return 0;
+}
+
+Arena *arenaAlloc(size_t capacity) {
+    Arena *result = (Arena *)os_alloc(sizeof(Arena) + capacity);
+    result->memory = result + sizeof(Arena);
+    result->capacity = capacity;
+    result->head = 0;
+    return result;
+}
+
+void arenaFree(Arena *arena) {
+    os_free(arena, arena->capacity);
+}
+
+void arenaFreeFrom(Arena *arena, size_t position) {
+    arena->head = position;
+}
+
+Arena *scratchArenas[2];
+
+void initialiseCore() {
+    for (EachInArray(scratchArenas, i)) {
+        scratchArenas[i] = arenaAlloc(Megabytes(64));
+    }
+}
+
+Scratch scratchStart(Arena **conflicts, size_t conflictCount) {
+    Scratch scratch = {0};
+    for (size_t i = 0; i < ArrayCount(scratchArenas); i += 1) {
+        bool conflicted = false;
+        for (Arena **conflict = conflicts; conflict < conflicts + conflictCount; conflict += 1) {
+            if (*conflict == scratchArenas[i]) {
+                conflicted = true;
+                break;
+            }
+        }
+        if (conflicted == false) {
+            scratch.arena = scratchArenas[i];
+            scratch.start = scratch.arena->head;
+            break;
+        }
+    }
+    return scratch;
+}
+
+#define DeferLoop(begin_stmnt, end_stmnt) for(int __defer_i = ((begin_stmnt), 0); __defer_i < 1; (++__defer_i, (end_stmnt)))
+#define WithScratch(scratchName) Scratch scratchName; DeferLoop(scratchName = scratchStart(0, 0), scratchEnd(scratchName))
+
+void scratchEnd(Scratch scratch) {
+    arenaFreeFrom(scratch.arena, scratch.start);
+}
+
+template <typename T>
+T *appendList(list<T> *list, T element) {
+    if (list->head < list->length) {
+        list->data[list->head] = element;
+        list->head++;
+        return &(list->data[list->head - 1]);
+    } else {
+        return 0;
+    }
+}
+
+template <typename T>
+void zeroListFull(list<T> *list) {
+    memset(list->data, 0, list->head * sizeof(T));
+}
+
+template <typename T>
+void zeroList(list<T> *list) {
+    list->head = 0;
+    memset(list->data, 0, list->head * sizeof(T));
+}
+
+inline string operator""_s(const char *cstrLiteral, unsigned long length) {
+    return {
+        (char *)cstrLiteral,
+        length,
+    };
+}
+
+const char *cstring(Arena *arena, list<char> buf) {
+    char *arr = PushArray(arena, char, buf.length + 1);
+    memmove(arr, buf.data, buf.length);
+    arr[buf.length] = '\0';
+    return arr;
+}
+
+const char *cstring(Arena *arena, string str) {
+    char *arr = PushArray(arena, char, str.length + 1);
+    memmove(arr, str.str, str.length);
+    arr[str.length] = '\0';
+    return arr;
+}
+
+bool strEql(string s1, string s2) {
+    if (s1.length != s2.length) {
+        return false;
+    }
+    for (size_t i = 0; i < s1.length; i++) {
+        if (s1.str[i] != s2.str[i]) {
+            return false;
+        }
+    }
+    return true;
+}
+
+size_t calcStringLen(const char *str) {
+    size_t size = 0;
+    if (str == NULL) {
+        return size;
+    }
+    while (str[size] != '\0') {
+        size++;
+    }
+    return size;
+}
+
+string strFromCString(Arena *arena, const char *str) {
+    string result = PushString(arena, calcStringLen(str));
+    memcpy(result.str, str, result.length);
+    return result;
+}
+
+string strReverse(Arena *arena, string str) {
+    string reversed = PushString(arena, str.length);
+    for (
+        size_t mainIndex = str.length - 1, reversedIndex = 0; 
+        mainIndex < str.length; 
+        mainIndex--, reversedIndex++
+    ) {
+        reversed.str[reversedIndex] = str.str[mainIndex];
+    }
+    return reversed;
+}
+
+string strPrintfv(Arena *arena, const char *fmt, va_list args) {
+    string result = {0};
+    va_list argsCopy;
+    va_copy(argsCopy, args);
+    uint64 bufSize = stb_vsnprintf(0, 0, fmt, args) + 1;
+    result.str = PushArray(arena, char, bufSize);
+    result.length = bufSize - 1;
+    stb_vsnprintf((char *)result.str, (int)bufSize, fmt, argsCopy);
+    return result;
+}
+
+string strPrintf(Arena *arena, const char *fmt, ...) {
+    string result = {0};
+    va_list args;
+    va_start(args, fmt);
+    result = strPrintfv(arena, fmt, args);
+    va_end(args);
+    return result;
+}
+
+template <typename T>
+list<T> listSlice(list<T> l, size_t start, size_t stop) {
+    if (stop == 0) {
+        stop = l.head;
+    }
+    // TODO(djledda): maybe assert instead
+    if (stop > l.head || start > stop) {
+        return {0};
+    }
+    return {
+        l.data + start,
+        stop - start,
+        stop - start,
+    };
+}
+
+string strSlice(string str, size_t start, size_t stop) {
+    if (stop == 0) {
+        stop = str.length;
+    }
+    // TODO(djledda): maybe assert instead
+    if (stop > str.length || start > stop) {
+        return {0};
+    }
+    return {
+        str.str + start,
+        stop - start,
+    };
+}
+
+string strSlice(char *data, size_t start, size_t stop) {
+    return {
+        data + start,
+        stop - start,
+    };
+}
+
+bool stringContains(string str, char c) {
+    for (size_t i = 0; i < str.length; i++) {
+        if (str.str[i] == c) {
+            return true;
+        }
+    }
+    return false;
+}
+
+string NUMERIC_CHARS = "0123456789"_s;
+inline bool isNumeric(char c) {
+    return stringContains(NUMERIC_CHARS, c);
+}
+
+list<string> strSplit(Arena *arena, string splitStr, string inputStr) {
+    list<string> result = {0};
+    if (inputStr.length > 0) {
+        size_t splitCount = 0;
+        size_t c = 0;
+        size_t start = 0;
+        void *beginning = (char *)arena->memory + arena->head;
+        while (c < inputStr.length - splitStr.length) {
+            if (strEql(strSlice(inputStr, c, c + splitStr.length), splitStr)) {
+                string *splitString = PushStruct(arena, string);
+                splitString->str = inputStr.str + start;
+                splitString->length = c - start;
+                splitCount++;
+                start = c + 1;
+            }
+            c++;
+        }
+
+        string *splitString = PushStruct(arena, string);
+        splitString->str = inputStr.str + start;
+        splitString->length = inputStr.length - start;
+        splitCount++;
+        result.data = (string *)beginning,
+        result.head = splitCount,
+        result.length = splitCount;
+    }
+    return result;
+}
+
+int8 parsePositiveInt(string str, size_t *lengthPointer) {
+    size_t numEnd = 0;
+    char currChar = str.str[numEnd];
+    while (numEnd < str.length && isNumeric(currChar)) {
+        currChar = str.str[++numEnd];
+        *lengthPointer += 1;
+    }
+    *lengthPointer -= 1;
+    if (numEnd > 0) {
+        uint8 result = 0;
+        for (size_t i = 0; i < numEnd; i++) {
+            result *= 10;
+            result += str.str[i] - '0';
+        }
+        return result;
+    } else {
+        return -1;
+    }
+}
+
+real32 parsePositiveReal32(string str, size_t *lengthPointer) {
+    real32 result = NAN;
+
+    string wholePartStr = string{0};
+    string fractionalPartStr = string{0};
+
+    bool split = false;
+    size_t c = 0;
+    while (c < str.length) {
+        if (str.str[c] == '.') {
+            wholePartStr.str = str.str;
+            wholePartStr.length = c;
+            fractionalPartStr.str = str.str + c + 1;
+            fractionalPartStr.length = str.length - c - 1;
+            split = true;
+            break;
+        }
+        c++;
+    }
+    if (split) {
+        int wholePart = parsePositiveInt(wholePartStr, lengthPointer);
+        *lengthPointer += 1;
+        int fractionalPart = parsePositiveInt(fractionalPartStr, lengthPointer);
+        if (wholePart >= 0 && fractionalPart >= 0) {
+            real32 fractionalPartMultiplier = 1.0f / powf(10.0f, (real32)fractionalPartStr.length);
+            result = (real32)wholePart + (real32)fractionalPart * (real32)fractionalPartMultiplier;
+        }
+    } else if (c > 0) {
+        result = (real32)parsePositiveInt(str, lengthPointer);
+    }
+    return result;
+}
+
+string readEntireFile(Arena *arena, string filename) {
+#if OS_WINDOWS
+    string result = {0};
+    HANDLE fileHandle = CreateFileA(cstring(arena, filename), GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, NULL, NULL);
+    if (fileHandle != INVALID_HANDLE_VALUE) {
+        LARGE_INTEGER fileSize;
+        if (GetFileSizeEx(fileHandle, &fileSize)) {
+            string readfile = PushString(arena, (size_t)fileSize.QuadPart);
+            if (readfile.str) {
+                DWORD bytesRead;
+                if (ReadFile(fileHandle, readfile.str, (DWORD)fileSize.QuadPart, &bytesRead, NULL) && (fileSize.QuadPart == bytesRead)) {
+                    result = readfile;
+                }
+            }
+        }
+        CloseHandle(fileHandle);
+    }
+    return result;
+#elif OS_LINUX
+    FILE *input = fopen((char *)filename.str, "r");
+    struct stat st;
+    stat((char *)filename.str, &st);
+    size_t fsize = st.st_size;
+    string readBuffer = PushString(arena, fsize);
+    fread(readBuffer.str, sizeof(byte), readBuffer.length, input);
+    fclose(input);
+    return readBuffer;
+#endif
+}
+
+bool writeEntireFile(Arena *arena, string filename, const byte *contents, size_t contentsLength) {
+    bool result = false;
+#if OS_WINDOWS
+    HANDLE fileHandle = CreateFileA(cstring(arena, filename), GENERIC_WRITE, FILE_SHARE_READ, NULL, CREATE_ALWAYS, NULL, NULL);
+    if (fileHandle != INVALID_HANDLE_VALUE) {
+        DWORD bytesWritten;
+        if (WriteFile(fileHandle, contents, (DWORD)contentsLength, &bytesWritten, NULL)) {
+            // file written successfully
+            result = bytesWritten == contentsLength;
+        }  
+        CloseHandle(fileHandle);
+    }  
+#elif OS_LINUX
+    Assert(false);
+#endif
+    return result;
+}
+
+bool fileAppend(Arena *arena, string filename, const byte *contents, size_t contentsLength) {
+    bool result = false;
+#if OS_WINDOWS
+    HANDLE fileHandle = CreateFileA(cstring(arena, filename), FILE_APPEND_DATA | FILE_GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL);
+    if (fileHandle != INVALID_HANDLE_VALUE) {
+        DWORD bytesWritten;
+        DWORD position = SetFilePointer(fileHandle, 0, NULL, FILE_END);
+        if (WriteFile(fileHandle, contents, (DWORD)contentsLength, &bytesWritten, NULL)) {
+            // file written successfully
+            result = bytesWritten == contentsLength;
+        }
+        CloseHandle(fileHandle);
+    }
+#elif OS_LINUX
+    Assert(false);
+#endif
+    return result;
+}
+
+list<string> getArgs(Arena *arena, int argc, char **argv) {
+    list<string> args = PushList(arena, string, (size_t)argc);
+    for (int i = 1; i < argc; i++) {
+        appendList(&args, strFromCString(arena, argv[i]));
+    }
+    return args;
+}
+
+UnixTimestamp getSystemUnixTime() {
+    time_t now;
+    time(&now);
+    return (UnixTimestamp)now;
+}
+
+Timestamp timestampFromUnixTime(UnixTimestamp *unixTimestamp) {
+    tm *timestamp = gmtime((time_t *)&time);
+    return *timestamp;
+}
+
+string formatTimeHms(Arena *arena, UnixTimestamp time) {
+    local_persist const string format = "HH-MM-SS"_s;
+    string buf = PushString(arena, format.length);
+    tm *timestamp = gmtime((time_t *)&time);
+    strftime(buf.str, buf.length + 1, "%T", timestamp);
+    return buf;
+}
+
+string formatTimeHms(Arena *arena, Timestamp *time) {
+    local_persist const string format = "HH-MM-SS"_s;
+    string buf = PushString(arena, format.length);
+    strftime(buf.str, buf.length + 1, "%T", (tm *)time);
+    return buf;
+}
+
+string formatTimeYmd(Arena *arena, UnixTimestamp time) {
+    local_persist const string format = "YYYY-mm-dd"_s;
+    string buf = PushString(arena, format.length);
+    tm *timestamp = gmtime((time_t *)&time);
+    strftime(buf.str, buf.length + 1, "%Y-%m-%d", timestamp);
+    return buf;
+}
+
+string formatTimeYmd(Arena *arena, Timestamp *time) {
+    local_persist const string format = "YYYY-mm-dd"_s;
+    string buf = PushString(arena, format.length);
+    strftime(buf.str, buf.length + 1, "%Y-%m-%d", (tm *)time);
+    return buf;
+}
+
+function void __core_log(LogTarget target, const char *fmt, va_list argList) {
+    Scratch scratch = scratchStart(0, 0);
+    string result = strPrintfv(scratch.arena, fmt, argList);
+#if OS_WINDOWS
+    DWORD done;
+    HANDLE stdHandle;
+    switch (target) {
+        case LogTarget_stdin: 
+            stdHandle = GetStdHandle(STD_INPUT_HANDLE);
+            break;
+        case LogTarget_stdout: 
+            stdHandle = GetStdHandle(STD_ERROR_HANDLE);
+            break;
+        case LogTarget_stderr: 
+            stdHandle = GetStdHandle(STD_OUTPUT_HANDLE);
+            break;
+        default:
+            stdHandle = GetStdHandle(STD_OUTPUT_HANDLE);
+            break;
+    }
+    WriteFile(stdHandle, result.str, (DWORD)result.length, &done, 0);
+#elif OS_LINUX
+    // TODO(djledda): finish implementation without cstdlib
+    switch (target) {
+        case LogTarget_stdin: 
+            write(0, (const void *)result.str, result.length);
+            break;
+        case LogTarget_stderr: 
+            fflush(stderr);
+            write(2, (const void *)result.str, result.length);
+            break;
+        case LogTarget_stdout: 
+        default:
+            fflush(stdout);
+            write(1, (const void *)result.str, result.length);
+            break;
+    }
+#endif
+    scratchEnd(scratch);
+}
+
+void logErr(const char *fmt, ...) {
+    va_list argList;
+    va_start(argList, fmt);
+    __core_log(LogTarget_stdout, fmt, argList);
+    va_end(argList);
+}
+
+function void logStdout(const char *fmt, ...) {
+    va_list argList;
+    va_start(argList, fmt);
+    __core_log(LogTarget_stdout, fmt, argList);
+    va_end(argList);
+}
+
+void log(const char *fmt, ...) {
+    va_list argList;
+    va_start(argList, fmt);
+    __core_log(LogTarget_stdout, fmt, argList);
+    va_end(argList);
+}
+
+void log(list<int> l, LogTarget target) {
+    void (*logFn)(const char *fmt, ...) = target == LogTarget_stdout ? &logStdout : &logErr;
+    logFn("{ ");
+    for (size_t i = 0; i < l.length; i++) {
+        if (i != 0) {
+            logFn(", ");
+        }
+        logFn("%i", l.data[i]);
+    }
+    logFn(" } length: %zu, head: %zu\n", l.length, l.head);
+}
+
+void log(list<string> l, LogTarget target) {
+    void (*logFn)(const char *fmt, ...) = target == LogTarget_stdout ? &logStdout : &logErr;
+    logFn("{ ");
+    for (size_t i = 0; i < l.length; i++) {
+        if (i != 0) {
+            logFn(", ");
+        } 
+        logFn("\"%S\"", l.data[i]);
+    }
+    logFn(" } length: %zu, head: %zu\n", l.length, l.head);
+}
+
+int intCompare(const void *a, const void *b) {
+    int *x = (int *)a;
+    int *y = (int *)b;
+    return (*x > *y) - (*x < *y);
+}
+
diff --git a/src/lib/djstdlib/core.h b/src/lib/djstdlib/core.h
new file mode 100644
index 0000000..60e4dde
--- /dev/null
+++ b/src/lib/djstdlib/core.h
@@ -0,0 +1,219 @@
+#ifndef CORE_H
+#define CORE_H
+
+// cstdlib includes
+#include <math.h>
+#include <stdint.h> // necessary for int type sizes
+#include <stdio.h>
+#include <time.h> // TODO(djledda): try not to depend on this one
+
+// ### Misc macros ### 
+#if ENABLE_ASSERT
+#define Assert(expression) if (!(expression)) {*(volatile int *)0 = 0;}
+#else
+#define Assert(expression) 
+#endif
+
+#define function static
+#define global static 
+#define local_persist static
+
+// ### Types ###
+typedef int8_t int8;
+typedef int16_t int16;
+typedef int32_t int32;
+typedef int64_t int64;
+typedef uint8_t uint8;
+typedef uint16_t uint16;
+typedef uint32_t uint32;
+typedef uint64_t uint64;
+typedef uint8_t byte;
+typedef float real32;
+typedef double real64;
+
+// ### Sizes and Numbers ###
+#define Bytes(n) (n)
+#define Kilobytes(n) (n << 10)
+#define Megabytes(n) (n << 20)
+#define Gigabytes(n) (((uint64)n) << 30)
+#define Terabytes(n) (((uint64)n) << 40)
+
+#define Thousand(n) ((n)*1000)
+#define Million(n)  ((n)*1000000)
+#define Billion(n)  ((n)*1000000000LL)
+
+#define ArrayCount(arr) (sizeof(arr) / sizeof((arr)[0]))
+
+// ### Arenas ###
+struct Arena {
+    void *memory;
+    size_t capacity;
+    size_t head;
+};
+
+struct Scratch {
+    Arena *arena;
+    size_t start;
+};
+
+void *pushSize(Arena *arena, size_t bytes);
+Arena *arenaAlloc(size_t capacity);
+void arenaFree(Arena *arena);
+void arenaFreeFrom(Arena *arena, size_t pos);
+
+void initialiseCore();
+
+Scratch scratchStart(Arena **conflicts, size_t conflictCount); 
+void scratchEnd(Scratch scratch); 
+
+#define PushArray(arena, type, size) (type *)pushSize(arena, sizeof(type) * (size))
+#define PushStruct(arena, type) (type *)pushSize(arena, sizeof(type))
+
+// ### Vectors ###
+template <typename T>
+union Vector2 {
+    struct {
+        T x;
+        T y;
+    };
+    T vec[2];
+};
+template <typename T>
+inline function Vector2<T> vec2(T x, T y) {
+    Vector2<T> result = {0};
+    result.x = x;
+    result.y = y;
+    return result;
+}
+
+template <typename T>
+union Vector3 {
+    struct {
+        T x;
+        T y;
+        T z;
+    };
+    T vec[3];
+};
+template <typename T>
+inline function Vector3<T> vec3(T x, T y, T z) {
+    Vector3<T> result = {0};
+    result.x = x;
+    result.y = y;
+    result.z = z;
+    return result;
+}
+
+template <typename T>
+union Vector4 {
+    struct {
+        T x;
+        T y;
+        T z;
+        T w;
+    };
+    T vec[4];
+};
+template <typename T>
+inline function Vector4<T> vec4(T x, T y, T z, T w) {
+    Vector4<T> result = {0};
+    result.x = x;
+    result.y = y;
+    result.z = z;
+    result.w = w;
+    return result;
+}
+
+// ### Lists ###
+template <typename T>
+struct list {
+    T* data;
+    size_t length;
+    size_t head;
+};
+
+#define PushList(arena, type, size) (list<type>{ PushArray(arena, type, size), size, 0 })
+#define PushFullList(arena, type, size) (list<type>{ PushArray(arena, type, size), size, size })
+
+template <typename T> T *appendList(list<T> *list, T element); 
+template <typename T> void zeroList(list<T> *list); 
+template <typename T> void zeroListFull(list<T> *list); 
+template <typename T> list<T> listSlice(list<T> l, size_t start, size_t stop = 0); 
+
+// ### Strings ###
+struct string {
+    char *str;
+    size_t length;
+};
+#define STB_SPRINTF_DECORATE(name) stb_##name // define this before including if you want to change the names
+#include "vendor/stb_sprintf.h"
+
+#define strlit(lit) (string{(char *)(lit), sizeof(lit) - 1})
+#define PushString(arena, length) (string{ (char *)pushSize(arena, length), (length) })
+string operator""_s(const char *cstrLiteral, unsigned long length);
+
+// C Strings
+const char *cstring(Arena *arena, list<char> buf); 
+const char *cstring(Arena *arena, string str); 
+size_t calcStringLen(const char *str); 
+string strFromCString(Arena *arena, const char *str); 
+
+bool strEql(string s1, string s2); 
+bool stringContains(string str, char c); 
+
+string strReverse(Arena *arena, string str); 
+string strSlice(string str, size_t start, size_t stop = 0); 
+string strSlice(char *data, size_t start, size_t stop = 0); 
+list<string> strSplit(Arena *arena, string splitStr, string inputStr); 
+string strPrintfv(Arena *arena, const char *fmt, va_list args);
+string strPrintf(Arena *arena, const char *fmt, ...);
+
+int8 parsePositiveInt(string str, size_t *lengthPointer); 
+real32 parsePositiveReal32(Arena *arena, string str, size_t *lengthPointer); 
+
+inline function bool isNumeric(char c); 
+
+// ### File IO ###
+string readEntireFile(Arena *arena, string filename); 
+bool writeEntireFile(Arena *arena, string filename, const byte *contents, size_t contentsLength); 
+bool fileAppend(Arena *arena, string filename, const byte *contents, size_t contentsLength); 
+
+// ### Cmdline ###
+list<string> getArgs(Arena *arena, int argc, char **argv); 
+
+// ### Time ###
+typedef uint64 UnixTimestamp;
+typedef tm Timestamp;
+
+UnixTimestamp getSystemUnixTime(); 
+Timestamp timestampFromUnixTime(UnixTimestamp *unixTimestamp); 
+string formatTimeHms(Arena *arena, UnixTimestamp time); 
+string formatTimeHms(Arena *arena, Timestamp *time); 
+string formatTimeYmd(Arena *arena, UnixTimestamp time); 
+string formatTimeYmd(Arena *arena, Timestamp *time); 
+
+// ### Linked Lists ###
+// TODO(djledda): implement basic linked lists (based on arenas?)
+
+// ### Logging ###
+enum LogTarget {
+    LogTarget_stdout,
+    LogTarget_stdin,
+    LogTarget_stderr,
+    LogTarget_count,
+};
+
+void log(list<int> l, LogTarget target = LogTarget_stdout);
+void log(list<string> l, LogTarget target = LogTarget_stdout);
+void log(const char *fmt, ...);
+void logError(const char *fmt, ...);
+
+// ### Loops ###
+#define EachIn(list, it) size_t it = 0; it < list.length; it++
+#define EachInReversed(list, it) size_t it = list.length - 1; it >= 0 && it < list.length; it--
+#define EachInArray(arr, it)  size_t it = 0; it < ArrayCount(arr); ++it
+
+// ### Misc ###
+int intCompare(const void *a, const void *b); 
+
+#endif
diff --git a/src/lib/djstdlib/os.cpp b/src/lib/djstdlib/os.cpp
new file mode 100644
index 0000000..64a2575
--- /dev/null
+++ b/src/lib/djstdlib/os.cpp
@@ -0,0 +1,12 @@
+#ifndef OS_CPP
+#define OS_CPP
+
+#if OS_WINDOWS
+#include "os_win32.cpp"
+#elif OS_LINUX
+#include "os_linux.cpp"
+#else 
+    #error Development environment not supported.
+#endif
+
+#endif
diff --git a/src/lib/djstdlib/os.h b/src/lib/djstdlib/os.h
new file mode 100644
index 0000000..44aac28
--- /dev/null
+++ b/src/lib/djstdlib/os.h
@@ -0,0 +1,12 @@
+#ifndef OS_H
+#define OS_H
+
+#include "core.h"
+
+// ### Memory ###
+void *os_alloc(size_t capacity);
+void os_reserve(void *ptr);
+void os_decommit(void *ptr);
+void os_free(void *ptr, size_t freeSize);
+
+#endif
diff --git a/src/lib/djstdlib/os_linux.cpp b/src/lib/djstdlib/os_linux.cpp
new file mode 100644
index 0000000..122a4bb
--- /dev/null
+++ b/src/lib/djstdlib/os_linux.cpp
@@ -0,0 +1,24 @@
+#ifndef OS_IMPL_LINUX_CPP
+#define OS_IMPL_LINUX_CPP
+
+#include "os.h"
+
+#include <sys/mman.h>
+#include <sys/stat.h>
+
+void *os_alloc(size_t capacity) {
+    return mmap(0, capacity, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+}
+
+void os_commit(void *ptr) {
+}
+
+void os_decommit(void *ptr) {
+}
+
+void os_free(void *ptr, size_t size) {
+    int err = munmap(ptr, size);
+    Assert(err != -1);
+}
+
+#endif
diff --git a/src/lib/djstdlib/os_win32.cpp b/src/lib/djstdlib/os_win32.cpp
new file mode 100644
index 0000000..01eb4ec
--- /dev/null
+++ b/src/lib/djstdlib/os_win32.cpp
@@ -0,0 +1,21 @@
+#ifndef OS_IMPL_WIN32_CPP
+#define OS_IMPL_WIN32_CPP
+
+#include "os.h"
+#include "Windows.h" 
+
+void *os_alloc(size_t commitSize) {
+    return VirtualAlloc(NULL, commitSize, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE);
+}
+
+void os_reserve(void *ptr) {
+}
+
+void os_decommit(void *ptr) {
+}
+
+void os_free(void *ptr, size_t size) {
+    VirtualFree(ptr, NULL, MEM_RELEASE);
+}
+
+#endif 
diff --git a/src/lib/djstdlib/vendor/stb_sprintf.h b/src/lib/djstdlib/vendor/stb_sprintf.h
new file mode 100644
index 0000000..6c2fd25
--- /dev/null
+++ b/src/lib/djstdlib/vendor/stb_sprintf.h
@@ -0,0 +1,1923 @@
+// NOTE(djledda): This library has been modified to support my string struct, inspired by the Digital Grove codebase by Ryan Fleury.
+
+// stb_sprintf - v1.10 - public domain snprintf() implementation
+// originally by Jeff Roberts / RAD Game Tools, 2015/10/20
+// http://github.com/nothings/stb
+//
+// allowed types:  sc uidBboXx p AaGgEef n
+// lengths      :  hh h ll j z t I64 I32 I
+//
+// Contributors:
+//    Fabian "ryg" Giesen (reformatting)
+//    github:aganm (attribute format)
+//
+// Contributors (bugfixes):
+//    github:d26435
+//    github:trex78
+//    github:account-login
+//    Jari Komppa (SI suffixes)
+//    Rohit Nirmal
+//    Marcin Wojdyr
+//    Leonard Ritter
+//    Stefano Zanotti
+//    Adam Allison
+//    Arvid Gerstmann
+//    Markus Kolb
+//
+// LICENSE:
+//
+//   See end of file for license information.
+
+#ifndef STB_SPRINTF_H_INCLUDE
+#define STB_SPRINTF_H_INCLUDE
+
+/*
+Single file sprintf replacement.
+
+Originally written by Jeff Roberts at RAD Game Tools - 2015/10/20.
+Hereby placed in public domain.
+
+This is a full sprintf replacement that supports everything that
+the C runtime sprintfs support, including float/double, 64-bit integers,
+hex floats, field parameters (%*.*d stuff), length reads backs, etc.
+
+Why would you need this if sprintf already exists?  Well, first off,
+it's *much* faster (see below). It's also much smaller than the CRT
+versions code-space-wise. We've also added some simple improvements
+that are super handy (commas in thousands, callbacks at buffer full,
+for example). Finally, the format strings for MSVC and GCC differ
+for 64-bit integers (among other small things), so this lets you use
+the same format strings in cross platform code.
+
+It uses the standard single file trick of being both the header file
+and the source itself. If you just include it normally, you just get
+the header file function definitions. To get the code, you include
+it from a C or C++ file and define STB_SPRINTF_IMPLEMENTATION first.
+
+It only uses va_args macros from the C runtime to do it's work. It
+does cast doubles to S64s and shifts and divides U64s, which does
+drag in CRT code on most platforms.
+
+It compiles to roughly 8K with float support, and 4K without.
+As a comparison, when using MSVC static libs, calling sprintf drags
+in 16K.
+
+API:
+====
+int stbsp_sprintf( char * buf, char const * fmt, ... )
+int stbsp_snprintf( char * buf, int count, char const * fmt, ... )
+  Convert an arg list into a buffer.  stbsp_snprintf always returns
+  a zero-terminated string (unlike regular snprintf).
+
+int stbsp_vsprintf( char * buf, char const * fmt, va_list va )
+int stbsp_vsnprintf( char * buf, int count, char const * fmt, va_list va )
+  Convert a va_list arg list into a buffer.  stbsp_vsnprintf always returns
+  a zero-terminated string (unlike regular snprintf).
+
+int stbsp_vsprintfcb( STBSP_SPRINTFCB * callback, void * user, char * buf, char const * fmt, va_list va )
+    typedef char * STBSP_SPRINTFCB( char const * buf, void * user, int len );
+  Convert into a buffer, calling back every STB_SPRINTF_MIN chars.
+  Your callback can then copy the chars out, print them or whatever.
+  This function is actually the workhorse for everything else.
+  The buffer you pass in must hold at least STB_SPRINTF_MIN characters.
+    // you return the next buffer to use or 0 to stop converting
+
+void stbsp_set_separators( char comma, char period )
+  Set the comma and period characters to use.
+
+FLOATS/DOUBLES:
+===============
+This code uses a internal float->ascii conversion method that uses
+doubles with error correction (double-doubles, for ~105 bits of
+precision).  This conversion is round-trip perfect - that is, an atof
+of the values output here will give you the bit-exact double back.
+
+One difference is that our insignificant digits will be different than
+with MSVC or GCC (but they don't match each other either).  We also
+don't attempt to find the minimum length matching float (pre-MSVC15
+doesn't either).
+
+If you don't need float or doubles at all, define STB_SPRINTF_NOFLOAT
+and you'll save 4K of code space.
+
+64-BIT INTS:
+============
+This library also supports 64-bit integers and you can use MSVC style or
+GCC style indicators (%I64d or %lld).  It supports the C99 specifiers
+for size_t and ptr_diff_t (%jd %zd) as well.
+
+EXTRAS:
+=======
+Like some GCCs, for integers and floats, you can use a ' (single quote)
+specifier and commas will be inserted on the thousands: "%'d" on 12345
+would print 12,345.
+
+For integers and floats, you can use a "$" specifier and the number
+will be converted to float and then divided to get kilo, mega, giga or
+tera and then printed, so "%$d" 1000 is "1.0 k", "%$.2d" 2536000 is
+"2.53 M", etc. For byte values, use two $:s, like "%$$d" to turn
+2536000 to "2.42 Mi". If you prefer JEDEC suffixes to SI ones, use three
+$:s: "%$$$d" -> "2.42 M". To remove the space between the number and the
+suffix, add "_" specifier: "%_$d" -> "2.53M".
+
+In addition to octal and hexadecimal conversions, you can print
+integers in binary: "%b" for 256 would print 100.
+
+PERFORMANCE vs MSVC 2008 32-/64-bit (GCC is even slower than MSVC):
+===================================================================
+"%d" across all 32-bit ints (4.8x/4.0x faster than 32-/64-bit MSVC)
+"%24d" across all 32-bit ints (4.5x/4.2x faster)
+"%x" across all 32-bit ints (4.5x/3.8x faster)
+"%08x" across all 32-bit ints (4.3x/3.8x faster)
+"%f" across e-10 to e+10 floats (7.3x/6.0x faster)
+"%e" across e-10 to e+10 floats (8.1x/6.0x faster)
+"%g" across e-10 to e+10 floats (10.0x/7.1x faster)
+"%f" for values near e-300 (7.9x/6.5x faster)
+"%f" for values near e+300 (10.0x/9.1x faster)
+"%e" for values near e-300 (10.1x/7.0x faster)
+"%e" for values near e+300 (9.2x/6.0x faster)
+"%.320f" for values near e-300 (12.6x/11.2x faster)
+"%a" for random values (8.6x/4.3x faster)
+"%I64d" for 64-bits with 32-bit values (4.8x/3.4x faster)
+"%I64d" for 64-bits > 32-bit values (4.9x/5.5x faster)
+"%s%s%s" for 64 char strings (7.1x/7.3x faster)
+"...512 char string..." ( 35.0x/32.5x faster!)
+*/
+
+#if defined(__clang__)
+ #if defined(__has_feature) && defined(__has_attribute)
+  #if __has_feature(address_sanitizer)
+   #if __has_attribute(__no_sanitize__)
+    #define STBSP__ASAN __attribute__((__no_sanitize__("address")))
+   #elif __has_attribute(__no_sanitize_address__)
+    #define STBSP__ASAN __attribute__((__no_sanitize_address__))
+   #elif __has_attribute(__no_address_safety_analysis__)
+    #define STBSP__ASAN __attribute__((__no_address_safety_analysis__))
+   #endif
+  #endif
+ #endif
+#elif defined(__GNUC__) && (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8))
+ #if defined(__SANITIZE_ADDRESS__) && __SANITIZE_ADDRESS__
+  #define STBSP__ASAN __attribute__((__no_sanitize_address__))
+ #endif
+#endif
+
+#ifndef STBSP__ASAN
+#define STBSP__ASAN
+#endif
+
+#ifdef STB_SPRINTF_STATIC
+#define STBSP__PUBLICDEC static
+#define STBSP__PUBLICDEF static STBSP__ASAN
+#else
+#ifdef __cplusplus
+#define STBSP__PUBLICDEC extern "C"
+#define STBSP__PUBLICDEF extern "C" STBSP__ASAN
+#else
+#define STBSP__PUBLICDEC extern
+#define STBSP__PUBLICDEF STBSP__ASAN
+#endif
+#endif
+
+#if defined(__has_attribute)
+ #if __has_attribute(format)
+   #define STBSP__ATTRIBUTE_FORMAT(fmt,va) __attribute__((format(printf,fmt,va)))
+ #endif
+#endif
+
+#ifndef STBSP__ATTRIBUTE_FORMAT
+#define STBSP__ATTRIBUTE_FORMAT(fmt,va)
+#endif
+
+#ifdef _MSC_VER
+#define STBSP__NOTUSED(v)  (void)(v)
+#else
+#define STBSP__NOTUSED(v)  (void)sizeof(v)
+#endif
+
+#include <stdarg.h> // for va_arg(), va_list()
+#include <stddef.h> // size_t, ptrdiff_t
+
+#ifndef STB_SPRINTF_MIN
+#define STB_SPRINTF_MIN 512 // how many characters per callback
+#endif
+typedef char *STBSP_SPRINTFCB(const char *buf, void *user, int len);
+
+#ifndef STB_SPRINTF_DECORATE
+#define STB_SPRINTF_DECORATE(name) stbsp_##name // define this before including if you want to change the names
+#endif
+
+STBSP__PUBLICDEC int STB_SPRINTF_DECORATE(vsprintf)(char *buf, char const *fmt, va_list va);
+STBSP__PUBLICDEC int STB_SPRINTF_DECORATE(vsnprintf)(char *buf, int count, char const *fmt, va_list va);
+STBSP__PUBLICDEC int STB_SPRINTF_DECORATE(sprintf)(char *buf, char const *fmt, ...) STBSP__ATTRIBUTE_FORMAT(2,3);
+STBSP__PUBLICDEC int STB_SPRINTF_DECORATE(snprintf)(char *buf, int count, char const *fmt, ...) STBSP__ATTRIBUTE_FORMAT(3,4);
+
+STBSP__PUBLICDEC int STB_SPRINTF_DECORATE(vsprintfcb)(STBSP_SPRINTFCB *callback, void *user, char *buf, char const *fmt, va_list va);
+STBSP__PUBLICDEC void STB_SPRINTF_DECORATE(set_separators)(char comma, char period);
+
+#endif // STB_SPRINTF_H_INCLUDE
+
+#ifdef STB_SPRINTF_IMPLEMENTATION
+
+#define stbsp__uint32 unsigned int
+#define stbsp__int32 signed int
+
+#ifdef _MSC_VER
+#define stbsp__uint64 unsigned __int64
+#define stbsp__int64 signed __int64
+#else
+#define stbsp__uint64 unsigned long long
+#define stbsp__int64 signed long long
+#endif
+#define stbsp__uint16 unsigned short
+
+#ifndef stbsp__uintptr
+#if defined(__ppc64__) || defined(__powerpc64__) || defined(__aarch64__) || defined(_M_X64) || defined(__x86_64__) || defined(__x86_64) || defined(__s390x__)
+#define stbsp__uintptr stbsp__uint64
+#else
+#define stbsp__uintptr stbsp__uint32
+#endif
+#endif
+
+#ifndef STB_SPRINTF_MSVC_MODE // used for MSVC2013 and earlier (MSVC2015 matches GCC)
+#if defined(_MSC_VER) && (_MSC_VER < 1900)
+#define STB_SPRINTF_MSVC_MODE
+#endif
+#endif
+
+#ifdef STB_SPRINTF_NOUNALIGNED // define this before inclusion to force stbsp_sprintf to always use aligned accesses
+#define STBSP__UNALIGNED(code)
+#else
+#define STBSP__UNALIGNED(code) code
+#endif
+
+#ifndef STB_SPRINTF_NOFLOAT
+// internal float utility functions
+static stbsp__int32 stbsp__real_to_str(char const **start, stbsp__uint32 *len, char *out, stbsp__int32 *decimal_pos, double value, stbsp__uint32 frac_digits);
+static stbsp__int32 stbsp__real_to_parts(stbsp__int64 *bits, stbsp__int32 *expo, double value);
+#define STBSP__SPECIAL 0x7000
+#endif
+
+static char stbsp__period = '.';
+static char stbsp__comma = ',';
+static struct
+{
+   short temp; // force next field to be 2-byte aligned
+   char pair[201];
+} stbsp__digitpair =
+{
+  0,
+   "00010203040506070809101112131415161718192021222324"
+   "25262728293031323334353637383940414243444546474849"
+   "50515253545556575859606162636465666768697071727374"
+   "75767778798081828384858687888990919293949596979899"
+};
+
+STBSP__PUBLICDEF void STB_SPRINTF_DECORATE(set_separators)(char pcomma, char pperiod)
+{
+   stbsp__period = pperiod;
+   stbsp__comma = pcomma;
+}
+
+#define STBSP__LEFTJUST 1
+#define STBSP__LEADINGPLUS 2
+#define STBSP__LEADINGSPACE 4
+#define STBSP__LEADING_0X 8
+#define STBSP__LEADINGZERO 16
+#define STBSP__INTMAX 32
+#define STBSP__TRIPLET_COMMA 64
+#define STBSP__NEGATIVE 128
+#define STBSP__METRIC_SUFFIX 256
+#define STBSP__HALFWIDTH 512
+#define STBSP__METRIC_NOSPACE 1024
+#define STBSP__METRIC_1024 2048
+#define STBSP__METRIC_JEDEC 4096
+
+static void stbsp__lead_sign(stbsp__uint32 fl, char *sign)
+{
+   sign[0] = 0;
+   if (fl & STBSP__NEGATIVE) {
+      sign[0] = 1;
+      sign[1] = '-';
+   } else if (fl & STBSP__LEADINGSPACE) {
+      sign[0] = 1;
+      sign[1] = ' ';
+   } else if (fl & STBSP__LEADINGPLUS) {
+      sign[0] = 1;
+      sign[1] = '+';
+   }
+}
+
+static STBSP__ASAN stbsp__uint32 stbsp__strlen_limited(char const *s, stbsp__uint32 limit)
+{
+   char const * sn = s;
+
+   // get up to 4-byte alignment
+   for (;;) {
+      if (((stbsp__uintptr)sn & 3) == 0)
+         break;
+
+      if (!limit || *sn == 0)
+         return (stbsp__uint32)(sn - s);
+
+      ++sn;
+      --limit;
+   }
+
+   // scan over 4 bytes at a time to find terminating 0
+   // this will intentionally scan up to 3 bytes past the end of buffers,
+   // but becase it works 4B aligned, it will never cross page boundaries
+   // (hence the STBSP__ASAN markup; the over-read here is intentional
+   // and harmless)
+   while (limit >= 4) {
+      stbsp__uint32 v = *(stbsp__uint32 *)sn;
+      // bit hack to find if there's a 0 byte in there
+      if ((v - 0x01010101) & (~v) & 0x80808080UL)
+         break;
+
+      sn += 4;
+      limit -= 4;
+   }
+
+   // handle the last few characters to find actual size
+   while (limit && *sn) {
+      ++sn;
+      --limit;
+   }
+
+   return (stbsp__uint32)(sn - s);
+}
+
+STBSP__PUBLICDEF int STB_SPRINTF_DECORATE(vsprintfcb)(STBSP_SPRINTFCB *callback, void *user, char *buf, char const *fmt, va_list va)
+{
+   static char hex[] = "0123456789abcdefxp";
+   static char hexu[] = "0123456789ABCDEFXP";
+   char *bf;
+   char const *f;
+   int tlen = 0;
+
+   bf = buf;
+   f = fmt;
+   for (;;) {
+      stbsp__int32 fw, pr, tz;
+      stbsp__uint32 fl;
+
+      // macros for the callback buffer stuff
+      #define stbsp__chk_cb_bufL(bytes)                        \
+         {                                                     \
+            int len = (int)(bf - buf);                         \
+            if ((len + (bytes)) >= STB_SPRINTF_MIN) {          \
+               tlen += len;                                    \
+               if (0 == (bf = buf = callback(buf, user, len))) \
+                  goto done;                                   \
+            }                                                  \
+         }
+      #define stbsp__chk_cb_buf(bytes)    \
+         {                                \
+            if (callback) {               \
+               stbsp__chk_cb_bufL(bytes); \
+            }                             \
+         }
+      #define stbsp__flush_cb()                      \
+         {                                           \
+            stbsp__chk_cb_bufL(STB_SPRINTF_MIN - 1); \
+         } // flush if there is even one byte in the buffer
+      #define stbsp__cb_buf_clamp(cl, v)                \
+         cl = v;                                        \
+         if (callback) {                                \
+            int lg = STB_SPRINTF_MIN - (int)(bf - buf); \
+            if (cl > lg)                                \
+               cl = lg;                                 \
+         }
+
+      // fast copy everything up to the next % (or end of string)
+      for (;;) {
+         while (((stbsp__uintptr)f) & 3) {
+         schk1:
+            if (f[0] == '%')
+               goto scandd;
+         schk2:
+            if (f[0] == 0)
+               goto endfmt;
+            stbsp__chk_cb_buf(1);
+            *bf++ = f[0];
+            ++f;
+         }
+         for (;;) {
+            // Check if the next 4 bytes contain %(0x25) or end of string.
+            // Using the 'hasless' trick:
+            // https://graphics.stanford.edu/~seander/bithacks.html#HasLessInWord
+            stbsp__uint32 v, c;
+            v = *(stbsp__uint32 *)f;
+            c = (~v) & 0x80808080;
+            if (((v ^ 0x25252525) - 0x01010101) & c)
+               goto schk1;
+            if ((v - 0x01010101) & c)
+               goto schk2;
+            if (callback)
+               if ((STB_SPRINTF_MIN - (int)(bf - buf)) < 4)
+                  goto schk1;
+            #ifdef STB_SPRINTF_NOUNALIGNED
+                if(((stbsp__uintptr)bf) & 3) {
+                    bf[0] = f[0];
+                    bf[1] = f[1];
+                    bf[2] = f[2];
+                    bf[3] = f[3];
+                } else
+            #endif
+            {
+                *(stbsp__uint32 *)bf = v;
+            }
+            bf += 4;
+            f += 4;
+         }
+      }
+   scandd:
+
+      ++f;
+
+      // ok, we have a percent, read the modifiers first
+      fw = 0;
+      pr = -1;
+      fl = 0;
+      tz = 0;
+
+      // flags
+      for (;;) {
+         switch (f[0]) {
+         // if we have left justify
+         case '-':
+            fl |= STBSP__LEFTJUST;
+            ++f;
+            continue;
+         // if we have leading plus
+         case '+':
+            fl |= STBSP__LEADINGPLUS;
+            ++f;
+            continue;
+         // if we have leading space
+         case ' ':
+            fl |= STBSP__LEADINGSPACE;
+            ++f;
+            continue;
+         // if we have leading 0x
+         case '#':
+            fl |= STBSP__LEADING_0X;
+            ++f;
+            continue;
+         // if we have thousand commas
+         case '\'':
+            fl |= STBSP__TRIPLET_COMMA;
+            ++f;
+            continue;
+         // if we have kilo marker (none->kilo->kibi->jedec)
+         case '$':
+            if (fl & STBSP__METRIC_SUFFIX) {
+               if (fl & STBSP__METRIC_1024) {
+                  fl |= STBSP__METRIC_JEDEC;
+               } else {
+                  fl |= STBSP__METRIC_1024;
+               }
+            } else {
+               fl |= STBSP__METRIC_SUFFIX;
+            }
+            ++f;
+            continue;
+         // if we don't want space between metric suffix and number
+         case '_':
+            fl |= STBSP__METRIC_NOSPACE;
+            ++f;
+            continue;
+         // if we have leading zero
+         case '0':
+            fl |= STBSP__LEADINGZERO;
+            ++f;
+            goto flags_done;
+         default: goto flags_done;
+         }
+      }
+   flags_done:
+
+      // get the field width
+      if (f[0] == '*') {
+         fw = va_arg(va, stbsp__uint32);
+         ++f;
+      } else {
+         while ((f[0] >= '0') && (f[0] <= '9')) {
+            fw = fw * 10 + f[0] - '0';
+            f++;
+         }
+      }
+      // get the precision
+      if (f[0] == '.') {
+         ++f;
+         if (f[0] == '*') {
+            pr = va_arg(va, stbsp__uint32);
+            ++f;
+         } else {
+            pr = 0;
+            while ((f[0] >= '0') && (f[0] <= '9')) {
+               pr = pr * 10 + f[0] - '0';
+               f++;
+            }
+         }
+      }
+
+      // handle integer size overrides
+      switch (f[0]) {
+      // are we halfwidth?
+      case 'h':
+         fl |= STBSP__HALFWIDTH;
+         ++f;
+         if (f[0] == 'h')
+            ++f;  // QUARTERWIDTH
+         break;
+      // are we 64-bit (unix style)
+      case 'l':
+         fl |= ((sizeof(long) == 8) ? STBSP__INTMAX : 0);
+         ++f;
+         if (f[0] == 'l') {
+            fl |= STBSP__INTMAX;
+            ++f;
+         }
+         break;
+      // are we 64-bit on intmax? (c99)
+      case 'j':
+         fl |= (sizeof(size_t) == 8) ? STBSP__INTMAX : 0;
+         ++f;
+         break;
+      // are we 64-bit on size_t or ptrdiff_t? (c99)
+      case 'z':
+         fl |= (sizeof(ptrdiff_t) == 8) ? STBSP__INTMAX : 0;
+         ++f;
+         break;
+      case 't':
+         fl |= (sizeof(ptrdiff_t) == 8) ? STBSP__INTMAX : 0;
+         ++f;
+         break;
+      // are we 64-bit (msft style)
+      case 'I':
+         if ((f[1] == '6') && (f[2] == '4')) {
+            fl |= STBSP__INTMAX;
+            f += 3;
+         } else if ((f[1] == '3') && (f[2] == '2')) {
+            f += 3;
+         } else {
+            fl |= ((sizeof(void *) == 8) ? STBSP__INTMAX : 0);
+            ++f;
+         }
+         break;
+      default: break;
+      }
+
+      // handle each replacement
+      switch (f[0]) {
+         #define STBSP__NUMSZ 512 // big enough for e308 (with commas) or e-307
+         char num[STBSP__NUMSZ];
+         char lead[8];
+         char tail[8];
+         char *s;
+         char const *h;
+         stbsp__uint32 l, n, cs;
+         stbsp__uint64 n64;
+#ifndef STB_SPRINTF_NOFLOAT
+         double fv;
+#endif
+         stbsp__int32 dp;
+         char const *sn;
+
+      case 's':
+         // get the string
+         s = va_arg(va, char *);
+         if (s == 0)
+            s = (char *)"null";
+         // get the length, limited to desired precision
+         // always limit to ~0u chars since our counts are 32b
+         l = stbsp__strlen_limited(s, (pr >= 0) ? pr : ~0u);
+         lead[0] = 0;
+         tail[0] = 0;
+         pr = 0;
+         dp = 0;
+         cs = 0;
+         // copy the string in
+         goto scopy;
+
+       case 'S': 
+       {
+         // string struct
+         string str = va_arg(va, string);
+         s = (char *)str.str;
+         sn = (const char *)(str.str + str.length);
+         l = (unsigned int)str.length;
+         lead[0] = 0;
+         tail[0] = 0;
+         pr = 0;
+         dp = 0;
+         cs = 0;
+         goto scopy;
+      } break;
+
+      case 'c': // char
+         // get the character
+         s = num + STBSP__NUMSZ - 1;
+         *s = (char)va_arg(va, int);
+         l = 1;
+         lead[0] = 0;
+         tail[0] = 0;
+         pr = 0;
+         dp = 0;
+         cs = 0;
+         goto scopy;
+
+      case 'n': // weird write-bytes specifier
+      {
+         int *d = va_arg(va, int *);
+         *d = tlen + (int)(bf - buf);
+      } break;
+
+#ifdef STB_SPRINTF_NOFLOAT
+      case 'A':              // float
+      case 'a':              // hex float
+      case 'G':              // float
+      case 'g':              // float
+      case 'E':              // float
+      case 'e':              // float
+      case 'f':              // float
+         va_arg(va, double); // eat it
+         s = (char *)"No float";
+         l = 8;
+         lead[0] = 0;
+         tail[0] = 0;
+         pr = 0;
+         cs = 0;
+         STBSP__NOTUSED(dp);
+         goto scopy;
+#else
+      case 'A': // hex float
+      case 'a': // hex float
+         h = (f[0] == 'A') ? hexu : hex;
+         fv = va_arg(va, double);
+         if (pr == -1)
+            pr = 6; // default is 6
+         // read the double into a string
+         if (stbsp__real_to_parts((stbsp__int64 *)&n64, &dp, fv))
+            fl |= STBSP__NEGATIVE;
+
+         s = num + 64;
+
+         stbsp__lead_sign(fl, lead);
+
+         if (dp == -1023)
+            dp = (n64) ? -1022 : 0;
+         else
+            n64 |= (((stbsp__uint64)1) << 52);
+         n64 <<= (64 - 56);
+         if (pr < 15)
+            n64 += ((((stbsp__uint64)8) << 56) >> (pr * 4));
+// add leading chars
+
+#ifdef STB_SPRINTF_MSVC_MODE
+         *s++ = '0';
+         *s++ = 'x';
+#else
+         lead[1 + lead[0]] = '0';
+         lead[2 + lead[0]] = 'x';
+         lead[0] += 2;
+#endif
+         *s++ = h[(n64 >> 60) & 15];
+         n64 <<= 4;
+         if (pr)
+            *s++ = stbsp__period;
+         sn = s;
+
+         // print the bits
+         n = pr;
+         if (n > 13)
+            n = 13;
+         if (pr > (stbsp__int32)n)
+            tz = pr - n;
+         pr = 0;
+         while (n--) {
+            *s++ = h[(n64 >> 60) & 15];
+            n64 <<= 4;
+         }
+
+         // print the expo
+         tail[1] = h[17];
+         if (dp < 0) {
+            tail[2] = '-';
+            dp = -dp;
+         } else
+            tail[2] = '+';
+         n = (dp >= 1000) ? 6 : ((dp >= 100) ? 5 : ((dp >= 10) ? 4 : 3));
+         tail[0] = (char)n;
+         for (;;) {
+            tail[n] = '0' + dp % 10;
+            if (n <= 3)
+               break;
+            --n;
+            dp /= 10;
+         }
+
+         dp = (int)(s - sn);
+         l = (int)(s - (num + 64));
+         s = num + 64;
+         cs = 1 + (3 << 24);
+         goto scopy;
+
+      case 'G': // float
+      case 'g': // float
+         h = (f[0] == 'G') ? hexu : hex;
+         fv = va_arg(va, double);
+         if (pr == -1)
+            pr = 6;
+         else if (pr == 0)
+            pr = 1; // default is 6
+         // read the double into a string
+         if (stbsp__real_to_str(&sn, &l, num, &dp, fv, (pr - 1) | 0x80000000))
+            fl |= STBSP__NEGATIVE;
+
+         // clamp the precision and delete extra zeros after clamp
+         n = pr;
+         if (l > (stbsp__uint32)pr)
+            l = pr;
+         while ((l > 1) && (pr) && (sn[l - 1] == '0')) {
+            --pr;
+            --l;
+         }
+
+         // should we use %e
+         if ((dp <= -4) || (dp > (stbsp__int32)n)) {
+            if (pr > (stbsp__int32)l)
+               pr = l - 1;
+            else if (pr)
+               --pr; // when using %e, there is one digit before the decimal
+            goto doexpfromg;
+         }
+         // this is the insane action to get the pr to match %g semantics for %f
+         if (dp > 0) {
+            pr = (dp < (stbsp__int32)l) ? l - dp : 0;
+         } else {
+            pr = -dp + ((pr > (stbsp__int32)l) ? (stbsp__int32) l : pr);
+         }
+         goto dofloatfromg;
+
+      case 'E': // float
+      case 'e': // float
+         h = (f[0] == 'E') ? hexu : hex;
+         fv = va_arg(va, double);
+         if (pr == -1)
+            pr = 6; // default is 6
+         // read the double into a string
+         if (stbsp__real_to_str(&sn, &l, num, &dp, fv, pr | 0x80000000))
+            fl |= STBSP__NEGATIVE;
+      doexpfromg:
+         tail[0] = 0;
+         stbsp__lead_sign(fl, lead);
+         if (dp == STBSP__SPECIAL) {
+            s = (char *)sn;
+            cs = 0;
+            pr = 0;
+            goto scopy;
+         }
+         s = num + 64;
+         // handle leading chars
+         *s++ = sn[0];
+
+         if (pr)
+            *s++ = stbsp__period;
+
+         // handle after decimal
+         if ((l - 1) > (stbsp__uint32)pr)
+            l = pr + 1;
+         for (n = 1; n < l; n++)
+            *s++ = sn[n];
+         // trailing zeros
+         tz = pr - (l - 1);
+         pr = 0;
+         // dump expo
+         tail[1] = h[0xe];
+         dp -= 1;
+         if (dp < 0) {
+            tail[2] = '-';
+            dp = -dp;
+         } else
+            tail[2] = '+';
+#ifdef STB_SPRINTF_MSVC_MODE
+         n = 5;
+#else
+         n = (dp >= 100) ? 5 : 4;
+#endif
+         tail[0] = (char)n;
+         for (;;) {
+            tail[n] = '0' + dp % 10;
+            if (n <= 3)
+               break;
+            --n;
+            dp /= 10;
+         }
+         cs = 1 + (3 << 24); // how many tens
+         goto flt_lead;
+
+      case 'f': // float
+         fv = va_arg(va, double);
+      doafloat:
+         // do kilos
+         if (fl & STBSP__METRIC_SUFFIX) {
+            double divisor;
+            divisor = 1000.0f;
+            if (fl & STBSP__METRIC_1024)
+               divisor = 1024.0;
+            while (fl < 0x4000000) {
+               if ((fv < divisor) && (fv > -divisor))
+                  break;
+               fv /= divisor;
+               fl += 0x1000000;
+            }
+         }
+         if (pr == -1)
+            pr = 6; // default is 6
+         // read the double into a string
+         if (stbsp__real_to_str(&sn, &l, num, &dp, fv, pr))
+            fl |= STBSP__NEGATIVE;
+      dofloatfromg:
+         tail[0] = 0;
+         stbsp__lead_sign(fl, lead);
+         if (dp == STBSP__SPECIAL) {
+            s = (char *)sn;
+            cs = 0;
+            pr = 0;
+            goto scopy;
+         }
+         s = num + 64;
+
+         // handle the three decimal varieties
+         if (dp <= 0) {
+            stbsp__int32 i;
+            // handle 0.000*000xxxx
+            *s++ = '0';
+            if (pr)
+               *s++ = stbsp__period;
+            n = -dp;
+            if ((stbsp__int32)n > pr)
+               n = pr;
+            i = n;
+            while (i) {
+               if ((((stbsp__uintptr)s) & 3) == 0)
+                  break;
+               *s++ = '0';
+               --i;
+            }
+            while (i >= 4) {
+               *(stbsp__uint32 *)s = 0x30303030;
+               s += 4;
+               i -= 4;
+            }
+            while (i) {
+               *s++ = '0';
+               --i;
+            }
+            if ((stbsp__int32)(l + n) > pr)
+               l = pr - n;
+            i = l;
+            while (i) {
+               *s++ = *sn++;
+               --i;
+            }
+            tz = pr - (n + l);
+            cs = 1 + (3 << 24); // how many tens did we write (for commas below)
+         } else {
+            cs = (fl & STBSP__TRIPLET_COMMA) ? ((600 - (stbsp__uint32)dp) % 3) : 0;
+            if ((stbsp__uint32)dp >= l) {
+               // handle xxxx000*000.0
+               n = 0;
+               for (;;) {
+                  if ((fl & STBSP__TRIPLET_COMMA) && (++cs == 4)) {
+                     cs = 0;
+                     *s++ = stbsp__comma;
+                  } else {
+                     *s++ = sn[n];
+                     ++n;
+                     if (n >= l)
+                        break;
+                  }
+               }
+               if (n < (stbsp__uint32)dp) {
+                  n = dp - n;
+                  if ((fl & STBSP__TRIPLET_COMMA) == 0) {
+                     while (n) {
+                        if ((((stbsp__uintptr)s) & 3) == 0)
+                           break;
+                        *s++ = '0';
+                        --n;
+                     }
+                     while (n >= 4) {
+                        *(stbsp__uint32 *)s = 0x30303030;
+                        s += 4;
+                        n -= 4;
+                     }
+                  }
+                  while (n) {
+                     if ((fl & STBSP__TRIPLET_COMMA) && (++cs == 4)) {
+                        cs = 0;
+                        *s++ = stbsp__comma;
+                     } else {
+                        *s++ = '0';
+                        --n;
+                     }
+                  }
+               }
+               cs = (int)(s - (num + 64)) + (3 << 24); // cs is how many tens
+               if (pr) {
+                  *s++ = stbsp__period;
+                  tz = pr;
+               }
+            } else {
+               // handle xxxxx.xxxx000*000
+               n = 0;
+               for (;;) {
+                  if ((fl & STBSP__TRIPLET_COMMA) && (++cs == 4)) {
+                     cs = 0;
+                     *s++ = stbsp__comma;
+                  } else {
+                     *s++ = sn[n];
+                     ++n;
+                     if (n >= (stbsp__uint32)dp)
+                        break;
+                  }
+               }
+               cs = (int)(s - (num + 64)) + (3 << 24); // cs is how many tens
+               if (pr)
+                  *s++ = stbsp__period;
+               if ((l - dp) > (stbsp__uint32)pr)
+                  l = pr + dp;
+               while (n < l) {
+                  *s++ = sn[n];
+                  ++n;
+               }
+               tz = pr - (l - dp);
+            }
+         }
+         pr = 0;
+
+         // handle k,m,g,t
+         if (fl & STBSP__METRIC_SUFFIX) {
+            char idx;
+            idx = 1;
+            if (fl & STBSP__METRIC_NOSPACE)
+               idx = 0;
+            tail[0] = idx;
+            tail[1] = ' ';
+            {
+               if (fl >> 24) { // SI kilo is 'k', JEDEC and SI kibits are 'K'.
+                  if (fl & STBSP__METRIC_1024)
+                     tail[idx + 1] = "_KMGT"[fl >> 24];
+                  else
+                     tail[idx + 1] = "_kMGT"[fl >> 24];
+                  idx++;
+                  // If printing kibits and not in jedec, add the 'i'.
+                  if (fl & STBSP__METRIC_1024 && !(fl & STBSP__METRIC_JEDEC)) {
+                     tail[idx + 1] = 'i';
+                     idx++;
+                  }
+                  tail[0] = idx;
+               }
+            }
+         };
+
+      flt_lead:
+         // get the length that we copied
+         l = (stbsp__uint32)(s - (num + 64));
+         s = num + 64;
+         goto scopy;
+#endif
+
+      case 'B': // upper binary
+      case 'b': // lower binary
+         h = (f[0] == 'B') ? hexu : hex;
+         lead[0] = 0;
+         if (fl & STBSP__LEADING_0X) {
+            lead[0] = 2;
+            lead[1] = '0';
+            lead[2] = h[0xb];
+         }
+         l = (8 << 4) | (1 << 8);
+         goto radixnum;
+
+      case 'o': // octal
+         h = hexu;
+         lead[0] = 0;
+         if (fl & STBSP__LEADING_0X) {
+            lead[0] = 1;
+            lead[1] = '0';
+         }
+         l = (3 << 4) | (3 << 8);
+         goto radixnum;
+
+      case 'p': // pointer
+         fl |= (sizeof(void *) == 8) ? STBSP__INTMAX : 0;
+         pr = sizeof(void *) * 2;
+         fl &= ~STBSP__LEADINGZERO; // 'p' only prints the pointer with zeros
+                                    // fall through - to X
+
+      case 'X': // upper hex
+      case 'x': // lower hex
+         h = (f[0] == 'X') ? hexu : hex;
+         l = (4 << 4) | (4 << 8);
+         lead[0] = 0;
+         if (fl & STBSP__LEADING_0X) {
+            lead[0] = 2;
+            lead[1] = '0';
+            lead[2] = h[16];
+         }
+      radixnum:
+         // get the number
+         if (fl & STBSP__INTMAX)
+            n64 = va_arg(va, stbsp__uint64);
+         else
+            n64 = va_arg(va, stbsp__uint32);
+
+         s = num + STBSP__NUMSZ;
+         dp = 0;
+         // clear tail, and clear leading if value is zero
+         tail[0] = 0;
+         if (n64 == 0) {
+            lead[0] = 0;
+            if (pr == 0) {
+               l = 0;
+               cs = 0;
+               goto scopy;
+            }
+         }
+         // convert to string
+         for (;;) {
+            *--s = h[n64 & ((1 << (l >> 8)) - 1)];
+            n64 >>= (l >> 8);
+            if (!((n64) || ((stbsp__int32)((num + STBSP__NUMSZ) - s) < pr)))
+               break;
+            if (fl & STBSP__TRIPLET_COMMA) {
+               ++l;
+               if ((l & 15) == ((l >> 4) & 15)) {
+                  l &= ~15;
+                  *--s = stbsp__comma;
+               }
+            }
+         };
+         // get the tens and the comma pos
+         cs = (stbsp__uint32)((num + STBSP__NUMSZ) - s) + ((((l >> 4) & 15)) << 24);
+         // get the length that we copied
+         l = (stbsp__uint32)((num + STBSP__NUMSZ) - s);
+         // copy it
+         goto scopy;
+
+      case 'u': // unsigned
+      case 'i':
+      case 'd': // integer
+         // get the integer and abs it
+         if (fl & STBSP__INTMAX) {
+            stbsp__int64 i64 = va_arg(va, stbsp__int64);
+            n64 = (stbsp__uint64)i64;
+            if ((f[0] != 'u') && (i64 < 0)) {
+               n64 = (stbsp__uint64)-i64;
+               fl |= STBSP__NEGATIVE;
+            }
+         } else {
+            stbsp__int32 i = va_arg(va, stbsp__int32);
+            n64 = (stbsp__uint32)i;
+            if ((f[0] != 'u') && (i < 0)) {
+               n64 = (stbsp__uint32)-i;
+               fl |= STBSP__NEGATIVE;
+            }
+         }
+
+#ifndef STB_SPRINTF_NOFLOAT
+         if (fl & STBSP__METRIC_SUFFIX) {
+            if (n64 < 1024)
+               pr = 0;
+            else if (pr == -1)
+               pr = 1;
+            fv = (double)(stbsp__int64)n64;
+            goto doafloat;
+         }
+#endif
+
+         // convert to string
+         s = num + STBSP__NUMSZ;
+         l = 0;
+
+         for (;;) {
+            // do in 32-bit chunks (avoid lots of 64-bit divides even with constant denominators)
+            char *o = s - 8;
+            if (n64 >= 100000000) {
+               n = (stbsp__uint32)(n64 % 100000000);
+               n64 /= 100000000;
+            } else {
+               n = (stbsp__uint32)n64;
+               n64 = 0;
+            }
+            if ((fl & STBSP__TRIPLET_COMMA) == 0) {
+               do {
+                  s -= 2;
+                  *(stbsp__uint16 *)s = *(stbsp__uint16 *)&stbsp__digitpair.pair[(n % 100) * 2];
+                  n /= 100;
+               } while (n);
+            }
+            while (n) {
+               if ((fl & STBSP__TRIPLET_COMMA) && (l++ == 3)) {
+                  l = 0;
+                  *--s = stbsp__comma;
+                  --o;
+               } else {
+                  *--s = (char)(n % 10) + '0';
+                  n /= 10;
+               }
+            }
+            if (n64 == 0) {
+               if ((s[0] == '0') && (s != (num + STBSP__NUMSZ)))
+                  ++s;
+               break;
+            }
+            while (s != o)
+               if ((fl & STBSP__TRIPLET_COMMA) && (l++ == 3)) {
+                  l = 0;
+                  *--s = stbsp__comma;
+                  --o;
+               } else {
+                  *--s = '0';
+               }
+         }
+
+         tail[0] = 0;
+         stbsp__lead_sign(fl, lead);
+
+         // get the length that we copied
+         l = (stbsp__uint32)((num + STBSP__NUMSZ) - s);
+         if (l == 0) {
+            *--s = '0';
+            l = 1;
+         }
+         cs = l + (3 << 24);
+         if (pr < 0)
+            pr = 0;
+
+      scopy:
+         // get fw=leading/trailing space, pr=leading zeros
+         if (pr < (stbsp__int32)l)
+            pr = l;
+         n = pr + lead[0] + tail[0] + tz;
+         if (fw < (stbsp__int32)n)
+            fw = n;
+         fw -= n;
+         pr -= l;
+
+         // handle right justify and leading zeros
+         if ((fl & STBSP__LEFTJUST) == 0) {
+            if (fl & STBSP__LEADINGZERO) // if leading zeros, everything is in pr
+            {
+               pr = (fw > pr) ? fw : pr;
+               fw = 0;
+            } else {
+               fl &= ~STBSP__TRIPLET_COMMA; // if no leading zeros, then no commas
+            }
+         }
+
+         // copy the spaces and/or zeros
+         if (fw + pr) {
+            stbsp__int32 i;
+            stbsp__uint32 c;
+
+            // copy leading spaces (or when doing %8.4d stuff)
+            if ((fl & STBSP__LEFTJUST) == 0)
+               while (fw > 0) {
+                  stbsp__cb_buf_clamp(i, fw);
+                  fw -= i;
+                  while (i) {
+                     if ((((stbsp__uintptr)bf) & 3) == 0)
+                        break;
+                     *bf++ = ' ';
+                     --i;
+                  }
+                  while (i >= 4) {
+                     *(stbsp__uint32 *)bf = 0x20202020;
+                     bf += 4;
+                     i -= 4;
+                  }
+                  while (i) {
+                     *bf++ = ' ';
+                     --i;
+                  }
+                  stbsp__chk_cb_buf(1);
+               }
+
+            // copy leader
+            sn = lead + 1;
+            while (lead[0]) {
+               stbsp__cb_buf_clamp(i, lead[0]);
+               lead[0] -= (char)i;
+               while (i) {
+                  *bf++ = *sn++;
+                  --i;
+               }
+               stbsp__chk_cb_buf(1);
+            }
+
+            // copy leading zeros
+            c = cs >> 24;
+            cs &= 0xffffff;
+            cs = (fl & STBSP__TRIPLET_COMMA) ? ((stbsp__uint32)(c - ((pr + cs) % (c + 1)))) : 0;
+            while (pr > 0) {
+               stbsp__cb_buf_clamp(i, pr);
+               pr -= i;
+               if ((fl & STBSP__TRIPLET_COMMA) == 0) {
+                  while (i) {
+                     if ((((stbsp__uintptr)bf) & 3) == 0)
+                        break;
+                     *bf++ = '0';
+                     --i;
+                  }
+                  while (i >= 4) {
+                     *(stbsp__uint32 *)bf = 0x30303030;
+                     bf += 4;
+                     i -= 4;
+                  }
+               }
+               while (i) {
+                  if ((fl & STBSP__TRIPLET_COMMA) && (cs++ == c)) {
+                     cs = 0;
+                     *bf++ = stbsp__comma;
+                  } else
+                     *bf++ = '0';
+                  --i;
+               }
+               stbsp__chk_cb_buf(1);
+            }
+         }
+
+         // copy leader if there is still one
+         sn = lead + 1;
+         while (lead[0]) {
+            stbsp__int32 i;
+            stbsp__cb_buf_clamp(i, lead[0]);
+            lead[0] -= (char)i;
+            while (i) {
+               *bf++ = *sn++;
+               --i;
+            }
+            stbsp__chk_cb_buf(1);
+         }
+
+         // copy the string
+         n = l;
+         while (n) {
+            stbsp__int32 i;
+            stbsp__cb_buf_clamp(i, n);
+            n -= i;
+            STBSP__UNALIGNED(while (i >= 4) {
+               *(stbsp__uint32 volatile *)bf = *(stbsp__uint32 volatile *)s;
+               bf += 4;
+               s += 4;
+               i -= 4;
+            })
+            while (i) {
+               *bf++ = *s++;
+               --i;
+            }
+            stbsp__chk_cb_buf(1);
+         }
+
+         // copy trailing zeros
+         while (tz) {
+            stbsp__int32 i;
+            stbsp__cb_buf_clamp(i, tz);
+            tz -= i;
+            while (i) {
+               if ((((stbsp__uintptr)bf) & 3) == 0)
+                  break;
+               *bf++ = '0';
+               --i;
+            }
+            while (i >= 4) {
+               *(stbsp__uint32 *)bf = 0x30303030;
+               bf += 4;
+               i -= 4;
+            }
+            while (i) {
+               *bf++ = '0';
+               --i;
+            }
+            stbsp__chk_cb_buf(1);
+         }
+
+         // copy tail if there is one
+         sn = tail + 1;
+         while (tail[0]) {
+            stbsp__int32 i;
+            stbsp__cb_buf_clamp(i, tail[0]);
+            tail[0] -= (char)i;
+            while (i) {
+               *bf++ = *sn++;
+               --i;
+            }
+            stbsp__chk_cb_buf(1);
+         }
+
+         // handle the left justify
+         if (fl & STBSP__LEFTJUST)
+            if (fw > 0) {
+               while (fw) {
+                  stbsp__int32 i;
+                  stbsp__cb_buf_clamp(i, fw);
+                  fw -= i;
+                  while (i) {
+                     if ((((stbsp__uintptr)bf) & 3) == 0)
+                        break;
+                     *bf++ = ' ';
+                     --i;
+                  }
+                  while (i >= 4) {
+                     *(stbsp__uint32 *)bf = 0x20202020;
+                     bf += 4;
+                     i -= 4;
+                  }
+                  while (i--)
+                     *bf++ = ' ';
+                  stbsp__chk_cb_buf(1);
+               }
+            }
+         break;
+
+      default: // unknown, just copy code
+         s = num + STBSP__NUMSZ - 1;
+         *s = f[0];
+         l = 1;
+         fw = fl = 0;
+         lead[0] = 0;
+         tail[0] = 0;
+         pr = 0;
+         dp = 0;
+         cs = 0;
+         goto scopy;
+      }
+      ++f;
+   }
+endfmt:
+
+   if (!callback)
+      *bf = 0;
+   else
+      stbsp__flush_cb();
+
+done:
+   return tlen + (int)(bf - buf);
+}
+
+// cleanup
+#undef STBSP__LEFTJUST
+#undef STBSP__LEADINGPLUS
+#undef STBSP__LEADINGSPACE
+#undef STBSP__LEADING_0X
+#undef STBSP__LEADINGZERO
+#undef STBSP__INTMAX
+#undef STBSP__TRIPLET_COMMA
+#undef STBSP__NEGATIVE
+#undef STBSP__METRIC_SUFFIX
+#undef STBSP__NUMSZ
+#undef stbsp__chk_cb_bufL
+#undef stbsp__chk_cb_buf
+#undef stbsp__flush_cb
+#undef stbsp__cb_buf_clamp
+
+// ============================================================================
+//   wrapper functions
+
+STBSP__PUBLICDEF int STB_SPRINTF_DECORATE(sprintf)(char *buf, char const *fmt, ...)
+{
+   int result;
+   va_list va;
+   va_start(va, fmt);
+   result = STB_SPRINTF_DECORATE(vsprintfcb)(0, 0, buf, fmt, va);
+   va_end(va);
+   return result;
+}
+
+typedef struct stbsp__context {
+   char *buf;
+   int count;
+   int length;
+   char tmp[STB_SPRINTF_MIN];
+} stbsp__context;
+
+static char *stbsp__clamp_callback(const char *buf, void *user, int len)
+{
+   stbsp__context *c = (stbsp__context *)user;
+   c->length += len;
+
+   if (len > c->count)
+      len = c->count;
+
+   if (len) {
+      if (buf != c->buf) {
+         const char *s, *se;
+         char *d;
+         d = c->buf;
+         s = buf;
+         se = buf + len;
+         do {
+            *d++ = *s++;
+         } while (s < se);
+      }
+      c->buf += len;
+      c->count -= len;
+   }
+
+   if (c->count <= 0)
+      return c->tmp;
+   return (c->count >= STB_SPRINTF_MIN) ? c->buf : c->tmp; // go direct into buffer if you can
+}
+
+static char * stbsp__count_clamp_callback( const char * buf, void * user, int len )
+{
+   stbsp__context * c = (stbsp__context*)user;
+   (void) sizeof(buf);
+
+   c->length += len;
+   return c->tmp; // go direct into buffer if you can
+}
+
+STBSP__PUBLICDEF int STB_SPRINTF_DECORATE( vsnprintf )( char * buf, int count, char const * fmt, va_list va )
+{
+   stbsp__context c;
+
+   if ( (count == 0) && !buf )
+   {
+      c.length = 0;
+
+      STB_SPRINTF_DECORATE( vsprintfcb )( stbsp__count_clamp_callback, &c, c.tmp, fmt, va );
+   }
+   else
+   {
+      int l;
+
+      c.buf = buf;
+      c.count = count;
+      c.length = 0;
+
+      STB_SPRINTF_DECORATE( vsprintfcb )( stbsp__clamp_callback, &c, stbsp__clamp_callback(0,&c,0), fmt, va );
+
+      // zero-terminate
+      l = (int)( c.buf - buf );
+      if ( l >= count ) // should never be greater, only equal (or less) than count
+         l = count - 1;
+      buf[l] = 0;
+   }
+
+   return c.length;
+}
+
+STBSP__PUBLICDEF int STB_SPRINTF_DECORATE(snprintf)(char *buf, int count, char const *fmt, ...)
+{
+   int result;
+   va_list va;
+   va_start(va, fmt);
+
+   result = STB_SPRINTF_DECORATE(vsnprintf)(buf, count, fmt, va);
+   va_end(va);
+
+   return result;
+}
+
+STBSP__PUBLICDEF int STB_SPRINTF_DECORATE(vsprintf)(char *buf, char const *fmt, va_list va)
+{
+   return STB_SPRINTF_DECORATE(vsprintfcb)(0, 0, buf, fmt, va);
+}
+
+// =======================================================================
+//   low level float utility functions
+
+#ifndef STB_SPRINTF_NOFLOAT
+
+// copies d to bits w/ strict aliasing (this compiles to nothing on /Ox)
+#define STBSP__COPYFP(dest, src)                   \
+   {                                               \
+      int cn;                                      \
+      for (cn = 0; cn < 8; cn++)                   \
+         ((char *)&dest)[cn] = ((char *)&src)[cn]; \
+   }
+
+// get float info
+static stbsp__int32 stbsp__real_to_parts(stbsp__int64 *bits, stbsp__int32 *expo, double value)
+{
+   double d;
+   stbsp__int64 b = 0;
+
+   // load value and round at the frac_digits
+   d = value;
+
+   STBSP__COPYFP(b, d);
+
+   *bits = b & ((((stbsp__uint64)1) << 52) - 1);
+   *expo = (stbsp__int32)(((b >> 52) & 2047) - 1023);
+
+   return (stbsp__int32)((stbsp__uint64) b >> 63);
+}
+
+static double const stbsp__bot[23] = {
+   1e+000, 1e+001, 1e+002, 1e+003, 1e+004, 1e+005, 1e+006, 1e+007, 1e+008, 1e+009, 1e+010, 1e+011,
+   1e+012, 1e+013, 1e+014, 1e+015, 1e+016, 1e+017, 1e+018, 1e+019, 1e+020, 1e+021, 1e+022
+};
+static double const stbsp__negbot[22] = {
+   1e-001, 1e-002, 1e-003, 1e-004, 1e-005, 1e-006, 1e-007, 1e-008, 1e-009, 1e-010, 1e-011,
+   1e-012, 1e-013, 1e-014, 1e-015, 1e-016, 1e-017, 1e-018, 1e-019, 1e-020, 1e-021, 1e-022
+};
+static double const stbsp__negboterr[22] = {
+   -5.551115123125783e-018,  -2.0816681711721684e-019, -2.0816681711721686e-020, -4.7921736023859299e-021, -8.1803053914031305e-022, 4.5251888174113741e-023,
+   4.5251888174113739e-024,  -2.0922560830128471e-025, -6.2281591457779853e-026, -3.6432197315497743e-027, 6.0503030718060191e-028,  2.0113352370744385e-029,
+   -3.0373745563400371e-030, 1.1806906454401013e-032,  -7.7705399876661076e-032, 2.0902213275965398e-033,  -7.1542424054621921e-034, -7.1542424054621926e-035,
+   2.4754073164739869e-036,  5.4846728545790429e-037,  9.2462547772103625e-038,  -4.8596774326570872e-039
+};
+static double const stbsp__top[13] = {
+   1e+023, 1e+046, 1e+069, 1e+092, 1e+115, 1e+138, 1e+161, 1e+184, 1e+207, 1e+230, 1e+253, 1e+276, 1e+299
+};
+static double const stbsp__negtop[13] = {
+   1e-023, 1e-046, 1e-069, 1e-092, 1e-115, 1e-138, 1e-161, 1e-184, 1e-207, 1e-230, 1e-253, 1e-276, 1e-299
+};
+static double const stbsp__toperr[13] = {
+   8388608,
+   6.8601809640529717e+028,
+   -7.253143638152921e+052,
+   -4.3377296974619174e+075,
+   -1.5559416129466825e+098,
+   -3.2841562489204913e+121,
+   -3.7745893248228135e+144,
+   -1.7356668416969134e+167,
+   -3.8893577551088374e+190,
+   -9.9566444326005119e+213,
+   6.3641293062232429e+236,
+   -5.2069140800249813e+259,
+   -5.2504760255204387e+282
+};
+static double const stbsp__negtoperr[13] = {
+   3.9565301985100693e-040,  -2.299904345391321e-063,  3.6506201437945798e-086,  1.1875228833981544e-109,
+   -5.0644902316928607e-132, -6.7156837247865426e-155, -2.812077463003139e-178,  -5.7778912386589953e-201,
+   7.4997100559334532e-224,  -4.6439668915134491e-247, -6.3691100762962136e-270, -9.436808465446358e-293,
+   8.0970921678014997e-317
+};
+
+#if defined(_MSC_VER) && (_MSC_VER <= 1200)
+static stbsp__uint64 const stbsp__powten[20] = {
+   1,
+   10,
+   100,
+   1000,
+   10000,
+   100000,
+   1000000,
+   10000000,
+   100000000,
+   1000000000,
+   10000000000,
+   100000000000,
+   1000000000000,
+   10000000000000,
+   100000000000000,
+   1000000000000000,
+   10000000000000000,
+   100000000000000000,
+   1000000000000000000,
+   10000000000000000000U
+};
+#define stbsp__tento19th ((stbsp__uint64)1000000000000000000)
+#else
+static stbsp__uint64 const stbsp__powten[20] = {
+   1,
+   10,
+   100,
+   1000,
+   10000,
+   100000,
+   1000000,
+   10000000,
+   100000000,
+   1000000000,
+   10000000000ULL,
+   100000000000ULL,
+   1000000000000ULL,
+   10000000000000ULL,
+   100000000000000ULL,
+   1000000000000000ULL,
+   10000000000000000ULL,
+   100000000000000000ULL,
+   1000000000000000000ULL,
+   10000000000000000000ULL
+};
+#define stbsp__tento19th (1000000000000000000ULL)
+#endif
+
+#define stbsp__ddmulthi(oh, ol, xh, yh)                            \
+   {                                                               \
+      double ahi = 0, alo, bhi = 0, blo;                           \
+      stbsp__int64 bt;                                             \
+      oh = xh * yh;                                                \
+      STBSP__COPYFP(bt, xh);                                       \
+      bt &= ((~(stbsp__uint64)0) << 27);                           \
+      STBSP__COPYFP(ahi, bt);                                      \
+      alo = xh - ahi;                                              \
+      STBSP__COPYFP(bt, yh);                                       \
+      bt &= ((~(stbsp__uint64)0) << 27);                           \
+      STBSP__COPYFP(bhi, bt);                                      \
+      blo = yh - bhi;                                              \
+      ol = ((ahi * bhi - oh) + ahi * blo + alo * bhi) + alo * blo; \
+   }
+
+#define stbsp__ddtoS64(ob, xh, xl)          \
+   {                                        \
+      double ahi = 0, alo, vh, t;           \
+      ob = (stbsp__int64)xh;                \
+      vh = (double)ob;                      \
+      ahi = (xh - vh);                      \
+      t = (ahi - xh);                       \
+      alo = (xh - (ahi - t)) - (vh + t);    \
+      ob += (stbsp__int64)(ahi + alo + xl); \
+   }
+
+#define stbsp__ddrenorm(oh, ol) \
+   {                            \
+      double s;                 \
+      s = oh + ol;              \
+      ol = ol - (s - oh);       \
+      oh = s;                   \
+   }
+
+#define stbsp__ddmultlo(oh, ol, xh, xl, yh, yl) ol = ol + (xh * yl + xl * yh);
+
+#define stbsp__ddmultlos(oh, ol, xh, yl) ol = ol + (xh * yl);
+
+static void stbsp__raise_to_power10(double *ohi, double *olo, double d, stbsp__int32 power) // power can be -323 to +350
+{
+   double ph, pl;
+   if ((power >= 0) && (power <= 22)) {
+      stbsp__ddmulthi(ph, pl, d, stbsp__bot[power]);
+   } else {
+      stbsp__int32 e, et, eb;
+      double p2h, p2l;
+
+      e = power;
+      if (power < 0)
+         e = -e;
+      et = (e * 0x2c9) >> 14; /* %23 */
+      if (et > 13)
+         et = 13;
+      eb = e - (et * 23);
+
+      ph = d;
+      pl = 0.0;
+      if (power < 0) {
+         if (eb) {
+            --eb;
+            stbsp__ddmulthi(ph, pl, d, stbsp__negbot[eb]);
+            stbsp__ddmultlos(ph, pl, d, stbsp__negboterr[eb]);
+         }
+         if (et) {
+            stbsp__ddrenorm(ph, pl);
+            --et;
+            stbsp__ddmulthi(p2h, p2l, ph, stbsp__negtop[et]);
+            stbsp__ddmultlo(p2h, p2l, ph, pl, stbsp__negtop[et], stbsp__negtoperr[et]);
+            ph = p2h;
+            pl = p2l;
+         }
+      } else {
+         if (eb) {
+            e = eb;
+            if (eb > 22)
+               eb = 22;
+            e -= eb;
+            stbsp__ddmulthi(ph, pl, d, stbsp__bot[eb]);
+            if (e) {
+               stbsp__ddrenorm(ph, pl);
+               stbsp__ddmulthi(p2h, p2l, ph, stbsp__bot[e]);
+               stbsp__ddmultlos(p2h, p2l, stbsp__bot[e], pl);
+               ph = p2h;
+               pl = p2l;
+            }
+         }
+         if (et) {
+            stbsp__ddrenorm(ph, pl);
+            --et;
+            stbsp__ddmulthi(p2h, p2l, ph, stbsp__top[et]);
+            stbsp__ddmultlo(p2h, p2l, ph, pl, stbsp__top[et], stbsp__toperr[et]);
+            ph = p2h;
+            pl = p2l;
+         }
+      }
+   }
+   stbsp__ddrenorm(ph, pl);
+   *ohi = ph;
+   *olo = pl;
+}
+
+// given a float value, returns the significant bits in bits, and the position of the
+//   decimal point in decimal_pos.  +/-INF and NAN are specified by special values
+//   returned in the decimal_pos parameter.
+// frac_digits is absolute normally, but if you want from first significant digits (got %g and %e), or in 0x80000000
+static stbsp__int32 stbsp__real_to_str(char const **start, stbsp__uint32 *len, char *out, stbsp__int32 *decimal_pos, double value, stbsp__uint32 frac_digits)
+{
+   double d;
+   stbsp__int64 bits = 0;
+   stbsp__int32 expo, e, ng, tens;
+
+   d = value;
+   STBSP__COPYFP(bits, d);
+   expo = (stbsp__int32)((bits >> 52) & 2047);
+   ng = (stbsp__int32)((stbsp__uint64) bits >> 63);
+   if (ng)
+      d = -d;
+
+   if (expo == 2047) // is nan or inf?
+   {
+      *start = (bits & ((((stbsp__uint64)1) << 52) - 1)) ? "NaN" : "Inf";
+      *decimal_pos = STBSP__SPECIAL;
+      *len = 3;
+      return ng;
+   }
+
+   if (expo == 0) // is zero or denormal
+   {
+      if (((stbsp__uint64) bits << 1) == 0) // do zero
+      {
+         *decimal_pos = 1;
+         *start = out;
+         out[0] = '0';
+         *len = 1;
+         return ng;
+      }
+      // find the right expo for denormals
+      {
+         stbsp__int64 v = ((stbsp__uint64)1) << 51;
+         while ((bits & v) == 0) {
+            --expo;
+            v >>= 1;
+         }
+      }
+   }
+
+   // find the decimal exponent as well as the decimal bits of the value
+   {
+      double ph, pl;
+
+      // log10 estimate - very specifically tweaked to hit or undershoot by no more than 1 of log10 of all expos 1..2046
+      tens = expo - 1023;
+      tens = (tens < 0) ? ((tens * 617) / 2048) : (((tens * 1233) / 4096) + 1);
+
+      // move the significant bits into position and stick them into an int
+      stbsp__raise_to_power10(&ph, &pl, d, 18 - tens);
+
+      // get full as much precision from double-double as possible
+      stbsp__ddtoS64(bits, ph, pl);
+
+      // check if we undershot
+      if (((stbsp__uint64)bits) >= stbsp__tento19th)
+         ++tens;
+   }
+
+   // now do the rounding in integer land
+   frac_digits = (frac_digits & 0x80000000) ? ((frac_digits & 0x7ffffff) + 1) : (tens + frac_digits);
+   if ((frac_digits < 24)) {
+      stbsp__uint32 dg = 1;
+      if ((stbsp__uint64)bits >= stbsp__powten[9])
+         dg = 10;
+      while ((stbsp__uint64)bits >= stbsp__powten[dg]) {
+         ++dg;
+         if (dg == 20)
+            goto noround;
+      }
+      if (frac_digits < dg) {
+         stbsp__uint64 r;
+         // add 0.5 at the right position and round
+         e = dg - frac_digits;
+         if ((stbsp__uint32)e >= 24)
+            goto noround;
+         r = stbsp__powten[e];
+         bits = bits + (r / 2);
+         if ((stbsp__uint64)bits >= stbsp__powten[dg])
+            ++tens;
+         bits /= r;
+      }
+   noround:;
+   }
+
+   // kill long trailing runs of zeros
+   if (bits) {
+      stbsp__uint32 n;
+      for (;;) {
+         if (bits <= 0xffffffff)
+            break;
+         if (bits % 1000)
+            goto donez;
+         bits /= 1000;
+      }
+      n = (stbsp__uint32)bits;
+      while ((n % 1000) == 0)
+         n /= 1000;
+      bits = n;
+   donez:;
+   }
+
+   // convert to string
+   out += 64;
+   e = 0;
+   for (;;) {
+      stbsp__uint32 n;
+      char *o = out - 8;
+      // do the conversion in chunks of U32s (avoid most 64-bit divides, worth it, constant denomiators be damned)
+      if (bits >= 100000000) {
+         n = (stbsp__uint32)(bits % 100000000);
+         bits /= 100000000;
+      } else {
+         n = (stbsp__uint32)bits;
+         bits = 0;
+      }
+      while (n) {
+         out -= 2;
+         *(stbsp__uint16 *)out = *(stbsp__uint16 *)&stbsp__digitpair.pair[(n % 100) * 2];
+         n /= 100;
+         e += 2;
+      }
+      if (bits == 0) {
+         if ((e) && (out[0] == '0')) {
+            ++out;
+            --e;
+         }
+         break;
+      }
+      while (out != o) {
+         *--out = '0';
+         ++e;
+      }
+   }
+
+   *decimal_pos = tens;
+   *start = out;
+   *len = e;
+   return ng;
+}
+
+#undef stbsp__ddmulthi
+#undef stbsp__ddrenorm
+#undef stbsp__ddmultlo
+#undef stbsp__ddmultlos
+#undef STBSP__SPECIAL
+#undef STBSP__COPYFP
+
+#endif // STB_SPRINTF_NOFLOAT
+
+// clean up
+#undef stbsp__uint16
+#undef stbsp__uint32
+#undef stbsp__int32
+#undef stbsp__uint64
+#undef stbsp__int64
+#undef STBSP__UNALIGNED
+
+#endif // STB_SPRINTF_IMPLEMENTATION
+
+/*
+------------------------------------------------------------------------------
+This software is available under 2 licenses -- choose whichever you prefer.
+------------------------------------------------------------------------------
+ALTERNATIVE A - MIT License
+Copyright (c) 2017 Sean Barrett
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal in
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+of the Software, and to permit persons to whom the Software is furnished to do
+so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+------------------------------------------------------------------------------
+ALTERNATIVE B - Public Domain (www.unlicense.org)
+This is free and unencumbered software released into the public domain.
+Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
+software, either in source code form or as a compiled binary, for any purpose,
+commercial or non-commercial, and by any means.
+In jurisdictions that recognize copyright laws, the author or authors of this
+software dedicate any and all copyright interest in the software to the public
+domain. We make this dedication for the benefit of the public at large and to
+the detriment of our heirs and successors. We intend this dedication to be an
+overt act of relinquishment in perpetuity of all present and future rights to
+this software under copyright law.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+------------------------------------------------------------------------------
+*/
diff --git a/lib/c/glad/glad.c b/src/lib/glad/glad.c
similarity index 100%
rename from lib/c/glad/glad.c
rename to src/lib/glad/glad.c
diff --git a/lib/c/glad/glad.h b/src/lib/glad/glad.h
similarity index 100%
rename from lib/c/glad/glad.h
rename to src/lib/glad/glad.h
diff --git a/lib/c/loaders/stb_image.h b/src/lib/loaders/stb_image.h
similarity index 100%
rename from lib/c/loaders/stb_image.h
rename to src/lib/loaders/stb_image.h
diff --git a/lib/c/loaders/tinyobj.h b/src/lib/loaders/tinyobj.h
similarity index 100%
rename from lib/c/loaders/tinyobj.h
rename to src/lib/loaders/tinyobj.h
diff --git a/src/main.cpp b/src/main.cpp
index 675abec..33a613f 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -1,39 +1,33 @@
-#include <bitset>
-#include <array>
-#include <span>
-#include <cstdint>
 #include <iostream>
-#include <string>
-#include <algorithm>
 #include <vector>
 #include <optional>
 
-#include "glad/glad.h"
+#include "lib/glad/glad.h"
 #include <glm/ext/matrix_transform.hpp>
 #include <GLFW/glfw3.h>
 #include <glm/glm.hpp>
 #include <glm/gtx/quaternion.hpp>
 #include <glm/gtc/type_ptr.hpp>
 #include <glm/gtc/matrix_transform.hpp>
-#include "loaders/stb_image.h"
+#include "lib/loaders/stb_image.h"
 
-#include "gfx/geometry.h"
 #include "gfx/Texture.h"
 #include "gfx/Mesh.h"
 #include "gfx/Shader.h"
 #include "gfx/Color.h"
 #include "VoxelSpace.h"
 #include "SomaSolve.h"
+#include "lib/djstdlib/core.cpp"
 
 struct Entity;
 struct Polycube;
 struct SceneGraphNode;
-auto new_entity() -> int;
-auto get_entity(int id) -> Entity*;
-auto get_scene_graph_node(int id) -> SceneGraphNode*;
-auto new_graph_node() -> int;
+int new_entity();
+Entity *get_entity(int id);
+SceneGraphNode *get_scene_graph_node(int id); 
+int new_graph_node();
 
-auto print_mat(glm::mat4* matrix) -> void {
+void print_mat(glm::mat4* matrix) {
     auto mat = *matrix;
     std::cout << mat[0][0] << mat[0][1] << mat[0][2] << mat[0][3] << std::endl;
     std::cout << mat[1][0] << mat[1][1] << mat[1][2] << mat[1][3] << std::endl;
@@ -49,19 +43,19 @@ struct Camera {
     glm::vec3 up;
     glm::vec3 target;
 
-    auto init(float aspect_ratio = 800.0f / 600.0f) -> void {
+    void init(float aspect_ratio = 800.0f / 600.0f) {
         view = glm::mat4();
         proj = glm::perspective(glm::radians(45.0f), aspect_ratio, 0.1f, 100.0f);
         pos = glm::vec3(0.0f);
         up = glm::vec3(0.0f, 1.0f, 0.0f);
     }
 
-    auto look_at(float x, float y, float z) -> void {
+    void look_at(float x, float y, float z) {
         target = glm::vec3(x, y, z);
         view = glm::lookAt(pos, target, up);
     }
 
-    auto set_up(float up_x, float up_y, float up_z) -> void {
+    void set_up(float up_x, float up_y, float up_z) {
         up = glm::vec3(up_x, up_y, up_z);
     }
 };
@@ -69,7 +63,7 @@ struct Camera {
 struct GlobalAppState {
     int current_polycube;
     int last_polycube_visible;
-    Shader* active_shader;
+    Shader *active_shader;
     std::vector<Polycube> polycubes;
 };
 GlobalAppState app_state;
@@ -80,8 +74,8 @@ struct WindowDims {
 };
 
 struct Entity {
-    Mesh* mesh;
-    Texture* tex;
+    Mesh *mesh;
+    Texture *tex;
     bool visible;
     int scene_graph_node;
 };
@@ -95,19 +89,19 @@ struct SceneGraphNode {
     std::vector<int> children;
     std::optional<int> entity;
 
-    auto reset() -> void {
+    void reset() {
         scale = glm::vec3(1.0f, 1.0f, 1.0f);
         translation = glm::vec3(0.0f, 0.0f, 0.0f);
         rotation = glm::quat(0.0f, 0.0f, 0.0f, 0.0f);
     }
     
-    auto init() -> void {
+    void init() {
         reset();
         local = glm::mat4(1.0f);
         world = local;
     }
 
-    auto update_local() -> void {
+    void update_local() {
         local = glm::scale(
             glm::translate(
                 glm::mat4(1.0f), 
@@ -122,8 +116,8 @@ struct Polycube {
     int graph_node;
     glm::vec3 color;
 
-    auto show() -> void {
-        auto node = get_scene_graph_node(graph_node);
+    void show() {
+        SceneGraphNode *node = get_scene_graph_node(graph_node);
         for (auto &child : node->children) {
             auto node = get_scene_graph_node(child);
             if (node->entity) {
@@ -132,19 +126,19 @@ struct Polycube {
         }
     }
 
-    auto hide() -> void {
-        auto node = get_scene_graph_node(graph_node);
-        for (auto &child : node->children) {
-            auto node = get_scene_graph_node(child);
+    void hide() {
+        SceneGraphNode *node = get_scene_graph_node(graph_node);
+        for (int &child : node->children) {
+            SceneGraphNode *node = get_scene_graph_node(child);
             if (node->entity) {
                 get_entity(*node->entity)->visible = false;
             }
         }
     }
 
-    auto get_centre() -> glm::vec3 {
-        auto centre = glm::vec3(0.0f);
-        for (auto &child : get_scene_graph_node(graph_node)->children) {
+    glm::vec3 get_centre() {
+        glm::vec3 centre = glm::vec3(0.0f);
+        for (int &child : get_scene_graph_node(graph_node)->children) {
             centre += get_scene_graph_node(child)->translation;
         }
         centre /= get_scene_graph_node(graph_node)->children.size();
@@ -159,17 +153,17 @@ struct Frame {
     int y;
     Camera* cam;
 
-    auto init(Camera* camera) -> void {
+    void init(Camera* camera) {
         camera->init((float)width / (float)height);
         cam = camera;
     }
 };
 
-auto framebuffer_size_callback(GLFWwindow* window, int width, int height) -> void {
+void framebuffer_size_callback(GLFWwindow *window, int width, int height) {
     glViewport(0, 0, width, height);
 }
 
-auto init_window_and_gl(WindowDims* window_dims) -> GLFWwindow* {
+GLFWwindow *init_window_and_gl(WindowDims *window_dims) {
     glfwInit();
     glfwWindowHint(GLFW_CONTEXT_VERSION_MAJOR, 4);
     glfwWindowHint(GLFW_CONTEXT_VERSION_MINOR, 6);
@@ -193,19 +187,19 @@ auto init_window_and_gl(WindowDims* window_dims) -> GLFWwindow* {
     return window;
 }
 
-auto gl_update_viewport(WindowDims* window_dims, Frame* frame) -> void {
+void gl_update_viewport(WindowDims* window_dims, Frame* frame) {
     glViewport(frame->x, window_dims->height - frame->y - frame->height, frame->width, frame->height);
 }
 
-auto cube_mesh = Mesh{};
-auto wall_tex = Texture{};
-auto entities = std::vector<Entity>();
-auto scene_graph_nodes = std::vector<SceneGraphNode>();
+Mesh cube_mesh = {0};
+Texture wall_tex = {0};
+std::vector<Entity> entities = std::vector<Entity>();
+std::vector<SceneGraphNode> scene_graph_nodes = std::vector<SceneGraphNode>();
 
-auto process_input(GLFWwindow *window) -> void {
-    static auto wireframe = false;
-    static auto last_frame_state_press_enter = false;
-    static auto last_frame_state_press = false;
+void process_input(GLFWwindow *window) {
+    static bool wireframe = false;
+    static bool last_frame_state_press_enter = false;
+    static bool last_frame_state_press = false;
 
     if (glfwGetKey(window, GLFW_KEY_ESCAPE) == GLFW_PRESS) {
         glfwSetWindowShouldClose(window, true);
@@ -232,7 +226,7 @@ auto process_input(GLFWwindow *window) -> void {
 }
 
 
-auto new_entity() -> int {
+int new_entity() {
     entities.emplace_back();
     scene_graph_nodes.emplace_back();
     entities.back().scene_graph_node = scene_graph_nodes.size();
@@ -240,21 +234,21 @@ auto new_entity() -> int {
     return entities.size();
 }
 
-auto get_entity(int id) -> Entity* {
+Entity *get_entity(int id) {
     return &entities[id - 1];
 }
 
-auto get_scene_graph_node(int id) -> SceneGraphNode* {
+SceneGraphNode *get_scene_graph_node(int id) {
     return &scene_graph_nodes[id - 1];
 }
 
-auto new_graph_node() -> int {
+int new_graph_node() {
     scene_graph_nodes.emplace_back();
     return scene_graph_nodes.size();
 }
 
-auto draw_entity(Entity* entity) -> void {
-    auto modelUniformLoc = glGetUniformLocation(app_state.active_shader->prog_id, "model");
+void draw_entity(Entity *entity) {
+    GLint modelUniformLoc = glGetUniformLocation(app_state.active_shader->prog_id, "model");
     glUniformMatrix4fv(modelUniformLoc, 1, GL_FALSE, glm::value_ptr(get_scene_graph_node(entity->scene_graph_node)->world));
     glBindTexture(GL_TEXTURE_2D, entity->tex->tex_id);
     glBindVertexArray(entity->mesh->vao);
@@ -262,17 +256,17 @@ auto draw_entity(Entity* entity) -> void {
     //glDrawElements(GL_TRIANGLES, entity->mesh->num_indices, GL_UNSIGNED_INT, 0);
 }
 
-auto create_polycube_from_repr(Voxel::Space* repr) -> Polycube {
-    auto polycube_id = new_graph_node();
+Polycube create_polycube_from_repr(Space *repr) {
+    int polycube_id = new_graph_node();
     get_scene_graph_node(polycube_id)->init();
     for (int x = 0; x < repr->dim_x; x++) {
         for (int y = 0; y < repr->dim_y; y++) {
             for (int z = 0; z < repr->dim_z; z++) {
-                if (Voxel::filledAt(repr, x, y, z)) {
-                    auto polycube_segment = get_entity(new_entity());
+                if (filledAt(repr, x, y, z)) {
+                    Entity *polycube_segment = get_entity(new_entity());
                     polycube_segment->mesh=&cube_mesh, 
                     polycube_segment->tex=&wall_tex;
-                    auto graph_node = get_scene_graph_node(polycube_segment->scene_graph_node);
+                    SceneGraphNode *graph_node = get_scene_graph_node(polycube_segment->scene_graph_node);
                     graph_node->init();
                     graph_node->translation = glm::vec3(
                         -((repr->dim_z - 1)/2.0f) + z,
@@ -285,67 +279,67 @@ auto create_polycube_from_repr(Voxel::Space* repr) -> Polycube {
             }
         }
     }
-    auto result = Polycube{
+    Polycube result = {
         .graph_node=polycube_id,
         .color=glm::vec3(1.0f),
     };
     return result;
 }
 
-auto recalculate_scene_graph(SceneGraphNode* top) -> void {
+void recalculate_scene_graph(SceneGraphNode *top) {
     if (top->children.size() == 0) {
         return;
     }
-    for (auto &node_id : top->children) {
-        auto graph_node = get_scene_graph_node(node_id);
+    for (int &node_id : top->children) {
+        SceneGraphNode *graph_node = get_scene_graph_node(node_id);
         graph_node->update_local();
         graph_node->world = top->world * graph_node->local;
         recalculate_scene_graph(graph_node);
     }
 }
 
-auto main_cmd() -> int {
-    SomaSolve::interactive_cmd_line_solve_soma();
+int main_cmd() {
+    interactive_cmd_line_solve_soma();
     return 0;
 }
 
-auto main_gfx() -> int {
-    auto window_dims = WindowDims{ 800, 600 };
-    auto window = init_window_and_gl(&window_dims);
-    if (window == nullptr) {
+int main_gfx() {
+    WindowDims window_dims = { 800, 600 };
+    GLFWwindow *window = init_window_and_gl(&window_dims);
+    if (!window) {
         return -1;
     }
 
-    app_state = GlobalAppState{
+    app_state = {
         .current_polycube=0,
         .last_polycube_visible=6,
-        .active_shader=nullptr,
+        .active_shader=0,
         .polycubes={},
     };
 
-    auto phong_shader = Shader{};
+    Shader phong_shader = {0};
     phong_shader.init("../assets/shaders/phong-solid.vertex.glsl", "../assets/shaders/phong-solid.fragment.glsl");
     app_state.active_shader = &phong_shader;
 
     cube_mesh.init("../assets/models/c000000.obj");
     wall_tex.init("../assets/textures/brick-wall.jpg");
 
-    auto little_frame = Frame{ .width=80, .height=60, .x=20, .y=20 };
-    auto big_frame = Frame{ .width=800, .height=600, .x=0, .y=0 };
-    auto main_cam = Camera{};
-    auto other_cam = Camera{};
+    Frame little_frame = { .width=80, .height=60, .x=20, .y=20 };
+    Frame big_frame = { .width=800, .height=600, .x=0, .y=0 };
+    Camera main_cam = {};
+    Camera other_cam = {};
     little_frame.init(&other_cam);
     big_frame.init(&main_cam);
-    auto frames = std::vector{ &big_frame, &little_frame };
+    std::vector<Frame> frames = { &big_frame, &little_frame };
 
-    auto root_node = SceneGraphNode{};
+    SceneGraphNode root_node = {};
     root_node.init();
 
-    for (int i = 0; i < SomaSolve::STD_SOMA.size(); i++) {
-        auto voxel_space = Voxel::Space{ SomaSolve::STD_SOMA[i], 3, 3, 3 };
-        Voxel::cullEmptySpace(&voxel_space);
-        auto polycube = create_polycube_from_repr(&voxel_space);
-        polycube.color = Color::color_from_index(i);
+    for (int i = 0; i < STD_SOMA.size(); i++) {
+        auto voxel_space = Space{ STD_SOMA[i], 3, 3, 3 };
+        cullEmptySpace(&voxel_space);
+        Polycube polycube = create_polycube_from_repr(&voxel_space);
+        polycube.color = color_from_index(i);
         app_state.polycubes.push_back(polycube);
         root_node.children.push_back(app_state.polycubes.back().graph_node);
     }
@@ -353,18 +347,18 @@ auto main_gfx() -> int {
     main_cam.pos = glm::vec3(4.0f, 4.0f, 4.0f);
     main_cam.look_at(0.0f, 0.0f, 0.0f);
 
-    auto light_pos = glm::vec3(6.0f);
+    glm::vec3 light_pos = glm::vec3(6.0f);
 
     glUseProgram(app_state.active_shader->prog_id);
-    auto view_loc = glGetUniformLocation(app_state.active_shader->prog_id, "view");
-    auto proj_loc = glGetUniformLocation(app_state.active_shader->prog_id, "projection");
-    auto light_pos_loc = glGetUniformLocation(app_state.active_shader->prog_id, "light_pos");
+    GLint view_loc = glGetUniformLocation(app_state.active_shader->prog_id, "view");
+    GLint proj_loc = glGetUniformLocation(app_state.active_shader->prog_id, "projection");
+    GLint light_pos_loc = glGetUniformLocation(app_state.active_shader->prog_id, "light_pos");
     glUniform3fv(light_pos_loc, 1, glm::value_ptr(light_pos));
     glUniformMatrix4fv(proj_loc, 1, GL_FALSE, glm::value_ptr(main_cam.proj));
     glUniformMatrix4fv(view_loc, 1, GL_FALSE, glm::value_ptr(main_cam.view));
 
-    auto last_frame = glfwGetTime();
-    auto time_delta = 1.0f/60.0f;
+    real32 last_frame = glfwGetTime();
+    real32 time_delta = 1.0f/60.0f;
     while (!glfwWindowShouldClose(window)) {
         time_delta = glfwGetTime() - last_frame;
         process_input(window);
@@ -379,16 +373,16 @@ auto main_gfx() -> int {
         glClear(GL_DEPTH_BUFFER_BIT | GL_COLOR_BUFFER_BIT);
 
         gl_update_viewport(&window_dims, &big_frame);
-        auto current_polycube = &app_state.polycubes[app_state.current_polycube];
+        Polycube *current_polycube = &app_state.polycubes[app_state.current_polycube];
         get_scene_graph_node(current_polycube->graph_node)->rotation = glm::quat(glm::vec3(0, glfwGetTime() / 2, 0));
 
         glBindVertexArray(cube_mesh.vao);
         //glBindTexture(GL_TEXTURE_2D, entity.tex->tex_id);
         recalculate_scene_graph(&root_node);
-        auto model_uniform_loc = glGetUniformLocation(app_state.active_shader->prog_id, "model");
-        auto solid_color_loc = glGetUniformLocation(app_state.active_shader->prog_id, "solid_color");
+        GLint model_uniform_loc = glGetUniformLocation(app_state.active_shader->prog_id, "model");
+        GLint solid_color_loc = glGetUniformLocation(app_state.active_shader->prog_id, "solid_color");
         glUniform3fv(solid_color_loc, 1, glm::value_ptr(current_polycube->color));
-        for (auto &entity : entities) {
+        for (Entity &entity : entities) {
             if (entity.visible) {
                 glUniformMatrix4fv(model_uniform_loc, 1, GL_FALSE, glm::value_ptr(get_scene_graph_node(entity.scene_graph_node)->world));
                 glDrawArrays(GL_TRIANGLES, 0, entity.mesh->num_indices);
@@ -404,7 +398,7 @@ auto main_gfx() -> int {
     return 0;
 }
 
-auto main() -> int {
+int main() {
     return main_cmd();
 }
 
diff --git a/src/main.zig b/src/main.zig
deleted file mode 100644
index 7214248..0000000
--- a/src/main.zig
+++ /dev/null
@@ -1,402 +0,0 @@
-const std = @import("std");
-const c = @import("c.zig");
-const zm = @import("zm");
-
-const Mesh = @import("gfx/Mesh.zig").Mesh;
-
-const ArrayList = std.ArrayList;
-
-fn print_mat(matrix: *const zm.Mat) void {
-    std.debug.print("{}, {}, {}, {}\n", .{ matrix[0][0], matrix[0][1], matrix[0][2], matrix[0][3] });
-    std.debug.print("{}, {}, {}, {}\n", .{ matrix[1][0], matrix[1][1], matrix[1][2], matrix[1][3] });
-    std.debug.print("{}, {}, {}, {}\n", .{ matrix[2][0], matrix[2][1], matrix[2][2], matrix[2][3] });
-    std.debug.print("{}, {}, {}, {}\n", .{ matrix[3][0], matrix[3][1], matrix[3][2], matrix[3][3] });
-}
-
-const Camera = struct {
-    view: zm.Mat = .{ zm.f32x4s(0.0), zm.f32x4s(0.0), zm.f32x4s(0.0), zm.f32x4s(0.0) },
-    proj: zm.Mat,
-    pos: zm.Vec = zm.f32x4s(0.0),
-    up: zm.Vec = zm.f32x4s(0.0),
-    target: zm.Vec,
-
-    pub fn init(self: Camera, aspect_ratio: f32) void {
-        self.proj = zm.perspectiveFovRh(std.math.degreesToRadians(45.0), aspect_ratio, 0.1, 100.0);
-    }
-
-    pub fn new(aspect_ratio: f32) Camera {
-        const cam = Camera{};
-        init(cam, aspect_ratio);
-        return cam;
-    }
-
-    pub fn look_at(self: Camera, x: f32, y: f32, z: f32) void {
-        self.target = zm.f32x4(x, y, z, 0.0);
-        self.view = zm.lookAtRh(self.pos, self.target, self.up);
-    }
-
-    pub fn set_up(self: Camera, up_x: f32, up_y: f32, up_z: f32) void {
-        self.up = zm.f32x4(up_x, up_y, up_z, 0.0);
-    }
-};
-
-const GlobalAppState = struct {
-    current_polycube: i32,
-    last_polycube_visible: i32,
-    active_shader: ?*Shader,
-    polycubes: ArrayList(Polycube),
-};
-
-const app_state: GlobalAppState = .{};
-
-const WindowDims = struct {
-    width: u32,
-    height: u32,
-};
-
-const Entity = struct {
-    mesh: *Mesh,
-    tex: *Texture,
-    visible: bool,
-    scene_graph_node: i32,
-};
-
-const SceneGraphNode = struct {
-    local: zm.Mat,
-    world: zm.Mat,
-    translation: zm.Vec,
-    rotation: zm.Quat,
-    scale: zm.Vec,
-    children: ArrayList(i32),
-    entity: ?i32,
-
-    pub fn reset(self: SceneGraphNode) void {
-        self.scale = zm.f32x4(1.0, 1.0, 1.0, 0.0);
-        self.translation = zm.f32x4s(0.0);
-        self.rotation = zm.f32x4s(0.0);
-    }
-
-    pub fn init(self: SceneGraphNode) void {
-        self.reset();
-        self.local = zm.identity();
-        self.world = self.local;
-    }
-
-    pub fn update_local(self: SceneGraphNode) void {
-        const scaling = zm.scaling(self.scale);
-        const translation = zm.translation(self.translation);
-        const rotation = zm.quatToMat(self.rotation);
-        self.local = zm.mul(zm.mul(translation, rotation), scaling);
-        self.local = scaling(
-            zm.translate(
-                zm.identity(),
-                self.translation
-            ) * toMat4(self.rotation),
-            self.scale
-        );
-    }
-};
-
-
-const Polycube = struct {
-    graph_node: i32,
-    color: zm.Vec,
-
-    pub fn show(self: Polycube) void {
-        const node = get_scene_graph_node(self.graph_node);
-        for (node.children.items) |child_id| {
-            const child_node = get_scene_graph_node(child_id);
-            if (child_node.entity) |entity_id| {
-                get_entity(entity_id).visible = true;
-            }
-        }
-    }
-
-    pub fn hide(self: Polycube) void {
-        const node = get_scene_graph_node(self.graph_node);
-        for (node.children.items) |child_id| {
-            const child_node = get_scene_graph_node(child_id);
-            if (child_node.entity) |entity_id| {
-                get_entity(entity_id).visible = false;
-            }
-        }
-    }
-
-    pub fn get_centre(self: Polycube) zm.Vec {
-        const centre = zm.Vec(0.0);
-        for (get_scene_graph_node(self.graph_node).children.items) |child_id| {
-            centre += get_scene_graph_node(child_id).translation;
-        }
-        centre /= get_scene_graph_node(self.graph_node).children.size();
-        return centre;
-    }
-};
-
-const Frame = struct {
-    width: i32,
-    height: i32,
-    x: i32,
-    y: i32,
-    cam: *Camera,
-
-    pub fn new(camera: *Camera, width: i32, height: i32) Frame {
-        const frame = Frame{};
-        camera.init(@as(f32, width) / @as(f32, height));
-        frame.cam = camera;
-        return frame;
-    }
-};
-
-fn framebuffer_size_callback(width: i32, height: i32) void {
-    c.glViewport(0, 0, width, height);
-}
-
-fn init_window_and_gl(window_dims: *WindowDims) ?*c.GLFWwindow {
-    c.glfwInit();
-    c.glfwWindowHint(c.GLFW_CONTEXT_VERSION_MAJOR, 4);
-    c.glfwWindowHint(c.GLFW_CONTEXT_VERSION_MINOR, 6);
-    c.glfwWindowHint(c.GLFW_OPENGL_PROFILE, c.GLFW_OPENGL_CORE_PROFILE);
-    const window = c.glfwCreateWindow(window_dims.width, window_dims.height, "Somaesque", c.NULL, c.NULL);
-    if (window == c.NULL) {
-        std.debug.print("Failed to create GLFW window");
-        c.glfwTerminate();
-        return null;
-    }
-    c.glfwMakeContextCurrent(window);
-
-    if (!c.gladLoadGLLoader(@as(c.GLADloadproc, c.glfwGetProcAddress))) {
-        std.debug.print("Failed to initialize GLAD");
-        return null;
-    }
-
-    c.glViewport(0, 0, 800, 600);
-    c.glfwSetFramebufferSizeCallback(window, framebuffer_size_callback);
-    c.glEnable(c.GL_DEPTH_TEST);
-    return window;
-}
-
-fn gl_update_viewport(window_dims: *WindowDims, frame: *Frame) void {
-    c.glViewport(frame.x, window_dims.height - frame.y - frame.height, frame.width, frame.height);
-}
-
-const cube_mesh = Mesh{};
-const wall_tex = Texture{};
-const entities = ArrayList(Entity);
-const scene_graph_nodes = ArrayList(SceneGraphNode);
-
-fn process_input(window: *c.GLFWwindow) void {
-    const static = struct {
-        wireframe: bool = false,
-        last_frame_state_press_enter: bool = false,
-        last_frame_state_press: bool = false,
-    };
-
-    if (c.glfwGetKey(window, c.GLFW_KEY_ESCAPE) == c.GLFW_PRESS) {
-        c.glfwSetWindowShouldClose(window, true);
-    }
-
-    if (c.glfwGetKey(window, c.GLFW_KEY_SPACE) == c.GLFW_PRESS and !static.last_frame_state_press) {
-        c.glPolygonMode(c.GL_FRONT_AND_BACK, if (!static.wireframe) c.GL_LINE else c.GL_FILL);
-        static.wireframe = !static.wireframe;
-        static.last_frame_state_press = true;
-    } else if (c.glfwGetKey(window, c.GLFW_KEY_SPACE) == c.GLFW_RELEASE) {
-        static.last_frame_state_press = false;
-    }
-
-    if (c.glfwGetKey(window, c.GLFW_KEY_ENTER) == c.GLFW_PRESS and !static.last_frame_state_press_enter) {
-        if (app_state.current_polycube == 6) {
-            app_state.current_polycube = 0;
-        } else {
-            app_state.current_polycube += 1;
-        }
-        static.last_frame_state_press_enter = true;
-    } else if (c.glfwGetKey(window, c.GLFW_KEY_ENTER) == c.GLFW_RELEASE) {
-        static.last_frame_state_press_enter = false;
-    }
-}
-
-
-fn new_entity() i32 {
-    entities.append(.{});
-    scene_graph_nodes.append(.{});
-    entities.items[entities.items.len - 1].scene_graph_node = scene_graph_nodes.items.len;
-    scene_graph_nodes.items[scene_graph_nodes.items.len - 1].entity = entities.items.len;
-    return entities.items.len;
-}
-
-fn get_entity(id: i32) ?*Entity {
-    if (entities.items[id - 1]) {
-        return &entities.items[id - 1];
-    }
-    return null;
-}
-
-fn get_scene_graph_node(id: i32) *SceneGraphNode {
-    if (scene_graph_nodes.items[id - 1]) {
-        return &scene_graph_nodes.items[id - 1];
-    }
-    return null;
-}
-
-fn new_graph_node() i32 {
-    scene_graph_nodes.append(.{});
-    return scene_graph_nodes.items.len;
-}
-
-fn draw_entity(entity: *Entity) void {
-    const modelUniformLoc = c.glGetUniformLocation(app_state.active_shader.prog_id, "model");
-    c.glUniformMatrix4fv(modelUniformLoc, 1, c.GL_FALSE, &get_scene_graph_node(entity.scene_graph_node).world);
-    c.glBindTexture(c.GL_TEXTURE_2D, entity.tex.tex_id);
-    c.glBindVertexArray(entity.mesh.vao);
-    c.glDrawArrays(c.GL_TRIANGLES, 0, entity.mesh.num_indices);
-    //c.glDrawElements(c.GL_TRIANGLES, entity.mesh.num_indices, c.GL_UNSIGNED_INT, 0);
-}
-
-fn create_polycube_from_repr(repr: *Voxel.Space) Polycube {
-    const polycube_id = new_graph_node();
-    get_scene_graph_node(polycube_id).init();
-    var x: usize = 1;
-    var y: usize = 1;
-    var z: usize = 1;
-    while (x < repr.dim_x) : (x += 1) {
-        while (y < repr.dim_y) : (y += 1) {
-            while (z < repr.dim_z) : (z += 1) {
-                if (Voxel.filledAt(repr, x, y, z)) {
-                    const polycube_segment = get_entity(new_entity());
-                    polycube_segment.mesh = &cube_mesh;
-                    polycube_segment.tex = &wall_tex;
-                    const graph_node = get_scene_graph_node(polycube_segment.scene_graph_node);
-                    graph_node.init();
-                    graph_node.translation = zm.f32x4(
-                        -((repr.dim_z - 1)/2.0) + z,
-                        ((repr.dim_x - 1)/2.0) - x,
-                        -((repr.dim_y - 1)/2.0) + y,
-                        0.0,
-                    );
-                    graph_node.update_local();
-                    get_scene_graph_node(polycube_id).children.append(polycube_segment.scene_graph_node);
-                }
-            }
-        }
-    }
-    const result = Polycube{
-        .graph_node = polycube_id,
-        .color = zm.f32x4s(1.0),
-    };
-    return result;
-}
-
-fn recalculate_scene_graph(top: *SceneGraphNode) void {
-    if (top.children.size() == 0) {
-        return;
-    }
-    for (top.children.items) |child_id| {
-        const graph_node = get_scene_graph_node(child_id);
-        graph_node.update_local();
-        graph_node.world = zm.mul(top.world, graph_node.local);
-        recalculate_scene_graph(graph_node);
-    }
-}
-
-pub fn main() void {
-    const window_dims = WindowDims{ 800, 600 };
-    const window = init_window_and_gl(&window_dims);
-    if (window == null) {
-        return -1;
-    }
-
-    app_state = GlobalAppState{
-        .current_polycube=0,
-        .last_polycube_visible=6,
-        .active_shader=null,
-        .polycubes={},
-    };
-
-    const phong_shader = Shader{};
-    phong_shader.init("../assets/shaders/phong-solid.vertex.glsl", "../assets/shaders/phong-solid.fragment.glsl");
-    app_state.active_shader = &phong_shader;
-
-    cube_mesh.init("../assets/models/c000000.obj");
-    wall_tex.init("../assets/textures/brick-wall.jpg");
-
-    const little_frame = Frame{ .width=80, .height=60, .x=20, .y=20 };
-    const big_frame = Frame{ .width=800, .height=600, .x=0, .y=0 };
-    const main_cam = Camera{};
-    const other_cam = Camera{};
-    little_frame.init(&other_cam);
-    big_frame.init(&main_cam);
-    const frames = [_]*Frame{ &big_frame, &little_frame };
-
-    const root_node = SceneGraphNode{};
-    root_node.init();
-
-    var i: usize = 0;
-    while (i < SomaSolve.STD_SOMA.items.len) : (i += 1) {
-        const voxel_space = voxel.Space{ SomaSolve.STD_SOMA[i], 3, 3, 3 };
-        voxel.cullEmptySpace(&voxel_space);
-        const polycube = create_polycube_from_repr(&voxel_space);
-        polycube.color = color.color_from_index(i);
-        app_state.polycubes.append(polycube);
-        root_node.children.append(app_state.polycubes.items[app_state.polycubes.items.len - 1].graph_node);
-    }
-
-    main_cam.pos = zm.f32x4(4.0, 4.0, 4.0, 0.0);
-    main_cam.look_at(0.0, 0.0, 0.0);
-
-    const light_pos = zm.f32x4(6.0, 6.0, 6.0, 0.0);
-
-    c.glUseProgram(app_state.active_shader.prog_id);
-    const view_loc = c.glGetUniformLocation(app_state.active_shader.prog_id, "view");
-    const proj_loc = c.glGetUniformLocation(app_state.active_shader.prog_id, "projection");
-    const light_pos_loc = c.glGetUniformLocation(app_state.active_shader.prog_id, "light_pos");
-    c.glUniform3fv(light_pos_loc, 1, &light_pos);
-    c.glUniformMatrix4fv(proj_loc, 1, GL_FALSE, &main_cam.proj);
-    c.glUniformMatrix4fv(view_loc, 1, GL_FALSE, &main_cam.view);
-
-    var last_frame = c.glfwGetTime();
-    var time_delta = 1.0/60.0;
-    while (!c.glfwWindowShouldClose(window)) {
-        time_delta = c.glfwGetTime() - last_frame;
-        process_input(window);
-
-        if (app_state.last_polycube_visible != app_state.current_polycube) {
-            app_state.polycubes[app_state.last_polycube_visible].hide();
-            app_state.polycubes[app_state.current_polycube].show();
-            app_state.last_polycube_visible = app_state.current_polycube;
-        }
-
-        c.glClearColor(0.0, 0.0, 0.0, 1.0);
-        c.glClear(c.GL_DEPTH_BUFFER_BIT | c.GL_COLOR_BUFFER_BIT);
-
-        c.gl_update_viewport(&window_dims, &big_frame);
-        const current_polycube = &app_state.polycubes[app_state.current_polycube];
-        c.get_scene_graph_node(current_polycube.graph_node).rotation = zm.quatFromRollPitchYaw(0.0, c.glfwGetTime() / 2.0, 0.0);
-
-        c.glBindVertexArray(cube_mesh.vao);
-        //glBindTexture(GL_TEXTURE_2D, entity.tex->tex_id);
-        recalculate_scene_graph(&root_node);
-        const model_uniform_loc = c.glGetUniformLocation(app_state.active_shader.prog_id, "model");
-        const solid_color_loc = c.glGetUniformLocation(app_state.active_shader.prog_id, "solid_color");
-        c.glUniform3fv(solid_color_loc, 1, &current_polycube.color);
-        while (entities.items) |entity| {
-            if (entity.visible) {
-                c.glUniformMatrix4fv(model_uniform_loc, 1, c.GL_FALSE, &get_scene_graph_node(entity.scene_graph_node).world);
-                c.glDrawArrays(c.GL_TRIANGLES, 0, entity.mesh.num_indices);
-                //glDrawElements(GL_TRIANGLES, entity->mesh->num_indices, GL_UNSIGNED_INT, 0);
-            }
-        }
-
-        c.glfwSwapBuffers(window);
-        c.glfwPollEvents();
-    }
-
-    c.glfwTerminate();
-    return 0;
-}
-
-//test "simple test" {
-//    var list = std.ArrayList(i32).init(std.testing.allocator);
-//    defer list.deinit(); // try commenting this out and see if zig detects the memory leak!
-//    try list.append(42);
-//    try std.testing.expectEqual(@as(i32, 42), list.pop());
-//}