diff --git a/.gitignore b/.gitignore
index 0067a97..87385e5 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,4 @@
-/build
-/debug
 .cache
 .vscode
+/zig-cache
+/zig-out
diff --git a/build.zig b/build.zig
new file mode 100644
index 0000000..ba9db36
--- /dev/null
+++ b/build.zig
@@ -0,0 +1,55 @@
+const std = @import("std");
+const zmath = @import("lib/zmath/build.zig");
+
+pub fn build(b: *std.Build) void {
+    // Standard target options allows the person running `zig build` to choose
+    // what target to build for. Here we do not override the defaults, which
+    // means any target is allowed, and the default is native. Other options
+    // for restricting supported target set are available.
+
+    const target = b.standardTargetOptions(.{});
+
+    // Standard release options allow the person running `zig build` to select
+    // between Debug, ReleaseSafe, ReleaseFast, and ReleaseSmall.
+    const mode = b.standardOptimizeOption(.{});
+
+    const exe = b.addExecutable(.{
+        .name = "somaesque-native-zig",
+        .root_source_file = .{ .path = "src/main.zig" },
+        .target = target,
+        .optimize = mode,
+    });
+    exe.addIncludePath("/usr/local/include");
+
+    exe.linkLibC();
+    exe.linkSystemLibrary("glfw3");
+    exe.linkSystemLibrary("glm");
+    exe.linkSystemLibrary("GL");
+    exe.addIncludePath("lib/c");
+
+    exe.addCSourceFile("lib/c/glad/glad.c", &[_][]const u8{"-std=c11"});
+
+    exe.install();
+
+    // zmath
+    const zmath_pkg = zmath.package(b, target, mode, .{
+        .options = .{ .enable_cross_platform_determinism = true },
+    });
+    zmath_pkg.link(exe);
+
+    const run_cmd = exe.run();
+    run_cmd.step.dependOn(b.getInstallStep());
+    if (b.args) |args| {
+        run_cmd.addArgs(args);
+    }
+
+    const run_step = b.step("run", "Run the app");
+    run_step.dependOn(&run_cmd.step);
+
+    //const exe_tests = b.addTest("src/main.zig");
+    //exe_tests.setTarget(target);
+    //exe_tests.setBuildMode(mode);
+
+    //const test_step = b.step("test", "Run unit tests");
+    //test_step.dependOn(&exe_tests.step);
+}
diff --git a/vendor/KHR/khrplatform.h b/lib/c/KHR/khrplatform.h
similarity index 100%
rename from vendor/KHR/khrplatform.h
rename to lib/c/KHR/khrplatform.h
diff --git a/vendor/glad/glad.c b/lib/c/glad/glad.c
similarity index 100%
rename from vendor/glad/glad.c
rename to lib/c/glad/glad.c
diff --git a/vendor/glad/glad.h b/lib/c/glad/glad.h
similarity index 100%
rename from vendor/glad/glad.h
rename to lib/c/glad/glad.h
diff --git a/vendor/loaders/stb_image.h b/lib/c/loaders/stb_image.h
similarity index 100%
rename from vendor/loaders/stb_image.h
rename to lib/c/loaders/stb_image.h
diff --git a/lib/c/loaders/tinyobj.h b/lib/c/loaders/tinyobj.h
new file mode 100644
index 0000000..cbfa301
--- /dev/null
+++ b/lib/c/loaders/tinyobj.h
@@ -0,0 +1,1739 @@
+/*
+   The MIT License (MIT)
+
+   Copyright (c) 2016 - 2019 Syoyo Fujita and many contributors.
+
+   Permission is hereby granted, free of charge, to any person obtaining a copy
+   of this software and associated documentation files (the "Software"), to deal
+   in the Software without restriction, including without limitation the rights
+   to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+   copies of the Software, and to permit persons to whom the Software is
+   furnished to do so, subject to the following conditions:
+
+   The above copyright notice and this permission notice shall be included in
+   all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+   AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+   THE SOFTWARE.
+   */
+#ifndef TINOBJ_LOADER_C_H_
+#define TINOBJ_LOADER_C_H_
+
+/* @todo { Remove stddef dependency. size_t? } */
+#include <stddef.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct {
+  char *name;
+
+  float ambient[3];
+  float diffuse[3];
+  float specular[3];
+  float transmittance[3];
+  float emission[3];
+  float shininess;
+  float ior;      /* index of refraction */
+  float dissolve; /* 1 == opaque; 0 == fully transparent */
+  /* illumination model (see http://www.fileformat.info/format/material/) */
+  int illum;
+
+  int pad0;
+
+  char *ambient_texname;            /* map_Ka */
+  char *diffuse_texname;            /* map_Kd */
+  char *specular_texname;           /* map_Ks */
+  char *specular_highlight_texname; /* map_Ns */
+  char *bump_texname;               /* map_bump, bump */
+  char *displacement_texname;       /* disp */
+  char *alpha_texname;              /* map_d */
+} tinyobj_material_t;
+
+typedef struct {
+  char *name; /* group name or object name. */
+  unsigned int face_offset;
+  unsigned int length;
+} tinyobj_shape_t;
+
+typedef struct { int v_idx, vt_idx, vn_idx; } tinyobj_vertex_index_t;
+
+typedef struct {
+  unsigned int num_vertices;
+  unsigned int num_normals;
+  unsigned int num_texcoords;
+  unsigned int num_faces;
+  unsigned int num_face_num_verts;
+
+  int pad0;
+
+  float *vertices;
+  float *normals;
+  float *texcoords;
+  tinyobj_vertex_index_t *faces;
+  int *face_num_verts;
+  int *material_ids;
+} tinyobj_attrib_t;
+
+
+#define TINYOBJ_FLAG_TRIANGULATE (1 << 0)
+
+#define TINYOBJ_INVALID_INDEX (0x80000000)
+
+#define TINYOBJ_SUCCESS (0)
+#define TINYOBJ_ERROR_EMPTY (-1)
+#define TINYOBJ_ERROR_INVALID_PARAMETER (-2)
+#define TINYOBJ_ERROR_FILE_OPERATION (-3)
+
+/* Provide a callback that can read text file without any parsing or modification.
+ * The obj and mtl parser is going to read all the necessary data:
+ * tinyobj_parse_obj
+ * tinyobj_parse_mtl_file
+ *
+ * @param[in] ctx User provided context.
+ * @param[in] filename Filename to be loaded.
+ * @param[in] is_mtl 1 when the callback is invoked for loading .mtl. 0 for .obj
+ * @param[in] obj_filename .obj filename. Useful when you load .mtl from same location of .obj. When the callback is called to load .obj, `filename` and `obj_filename` are same.
+ * @param[out] buf Content of loaded file
+ * @param[out] len Size of content(file)
+ */
+typedef void (*file_reader_callback)(void *ctx, const char *filename, int is_mtl, const char *obj_filename, char **buf, size_t *len);
+
+/* Parse wavefront .obj
+ * @param[out] attrib Attibutes
+ * @param[out] shapes Array of parsed shapes
+ * @param[out] num_shapes Array length of `shapes`
+ * @param[out] materials Array of parsed materials
+ * @param[out] num_materials Array length of `materials`
+ * @param[in] file_name File name of .obj
+ * @param[in] file_reader File reader callback function(to read .obj and .mtl).
+ * @param[in] ctx Context pointer passed to the file_reader_callback.
+ * @param[in] flags combination of TINYOBJ_FLAG_***
+ *
+ * Returns TINYOBJ_SUCCESS if things goes well.
+ * Returns TINYOBJ_ERR_*** when there is an error.
+ */
+extern int tinyobj_parse_obj(tinyobj_attrib_t *attrib, tinyobj_shape_t **shapes,
+                             size_t *num_shapes, tinyobj_material_t **materials,
+                             size_t *num_materials, const char *file_name, file_reader_callback file_reader,
+                             void *ctx, unsigned int flags);
+
+/* Parse wavefront .mtl
+ *
+ * @param[out] materials_out
+ * @param[out] num_materials_out
+ * @param[in] filename .mtl filename
+ * @param[in] filename of .obj filename. could be NULL if you just want to parse .mtl file.
+ * @param[in] file_reader File reader callback
+ * @param[in[ ctx Context pointer passed to the file_reader callack.
+
+ * Returns TINYOBJ_SUCCESS if things goes well.
+ * Returns TINYOBJ_ERR_*** when there is an error.
+ */
+extern int tinyobj_parse_mtl_file(tinyobj_material_t **materials_out,
+                                  size_t *num_materials_out,
+                                  const char *filename, const char *obj_filename, file_reader_callback file_reader,
+				  void *ctx);
+
+extern void tinyobj_attrib_init(tinyobj_attrib_t *attrib);
+extern void tinyobj_attrib_free(tinyobj_attrib_t *attrib);
+extern void tinyobj_shapes_free(tinyobj_shape_t *shapes, size_t num_shapes);
+extern void tinyobj_materials_free(tinyobj_material_t *materials,
+                                   size_t num_materials);
+
+#ifdef __cplusplus
+}
+#endif
+
+#ifdef TINYOBJ_LOADER_C_IMPLEMENTATION
+#include <stdio.h>
+#include <assert.h>
+#include <string.h>
+#include <errno.h>
+
+#if defined(TINYOBJ_MALLOC) && defined(TINYOBJ_CALLOC) && defined(TINYOBJ_FREE) && (defined(TINYOBJ_REALLOC) || defined(TINYOBJ_REALLOC_SIZED))
+/* ok */
+#elif !defined(TINYOBJ_MALLOC) && !defined(TINYOBJ_CALLOC) && !defined(TINYOBJ_FREE) && !defined(TINYOBJ_REALLOC) && !defined(TINYOBJ_REALLOC_SIZED)
+/* ok */
+#else
+#error "Must define all or none of TINYOBJ_MALLOC, TINYOBJ_CALLOC, TINYOBJ_FREE, and TINYOBJ_REALLOC (or TINYOBJ_REALLOC_SIZED)."
+#endif
+
+#ifndef TINYOBJ_MALLOC
+#include <stdlib.h>
+#define TINYOBJ_MALLOC malloc
+#define TINYOBJ_REALLOC realloc
+#define TINYOBJ_CALLOC calloc
+#define TINYOBJ_FREE free
+#endif
+
+#ifndef TINYOBJ_REALLOC_SIZED
+#define TINYOBJ_REALLOC_SIZED(p,oldsz,newsz) TINYOBJ_REALLOC(p,newsz)
+#endif
+
+#define TINYOBJ_MAX_FACES_PER_F_LINE (16)
+#define TINYOBJ_MAX_FILEPATH (8192)
+
+#define IS_SPACE(x) (((x) == ' ') || ((x) == '\t'))
+#define IS_DIGIT(x) ((unsigned int)((x) - '0') < (unsigned int)(10))
+#define IS_NEW_LINE(x) (((x) == '\r') || ((x) == '\n') || ((x) == '\0'))
+
+static void skip_space(const char **token) {
+  while ((*token)[0] == ' ' || (*token)[0] == '\t') {
+    (*token)++;
+  }
+}
+
+static void skip_space_and_cr(const char **token) {
+  while ((*token)[0] == ' ' || (*token)[0] == '\t' || (*token)[0] == '\r') {
+    (*token)++;
+  }
+}
+
+static int until_space(const char *token) {
+  const char *p = token;
+  while (p[0] != '\0' && p[0] != ' ' && p[0] != '\t' && p[0] != '\r') {
+    p++;
+  }
+
+  return (int)(p - token);
+}
+
+static size_t length_until_newline(const char *token, size_t n) {
+  size_t len = 0;
+
+  /* Assume token[n-1] = '\0' */
+  for (len = 0; len < n - 1; len++) {
+    if (token[len] == '\n') {
+      break;
+    }
+    if ((token[len] == '\r') && ((len < (n - 2)) && (token[len + 1] != '\n'))) {
+      break;
+    }
+  }
+
+  return len;
+}
+
+static size_t length_until_line_feed(const char *token, size_t n) {
+  size_t len = 0;
+
+  /* Assume token[n-1] = '\0' */
+  for (len = 0; len < n; len++) {
+    if ((token[len] == '\n') || (token[len] == '\r')) {
+      break;
+    }
+  }
+
+  return len;
+}
+
+/* http://stackoverflow.com/questions/5710091/how-does-atoi-function-in-c-work
+*/
+static int my_atoi(const char *c) {
+  int value = 0;
+  int sign = 1;
+  if (*c == '+' || *c == '-') {
+    if (*c == '-') sign = -1;
+    c++;
+  }
+  while (((*c) >= '0') && ((*c) <= '9')) { /* isdigit(*c) */
+    value *= 10;
+    value += (int)(*c - '0');
+    c++;
+  }
+  return value * sign;
+}
+
+/* Make index zero-base, and also support relative index. */
+static int fixIndex(int idx, size_t n) {
+  if (idx > 0) return idx - 1;
+  if (idx == 0) return 0;
+  return (int)n + idx; /* negative value = relative */
+}
+
+/* Parse raw triples: i, i/j/k, i//k, i/j */
+static tinyobj_vertex_index_t parseRawTriple(const char **token) {
+  tinyobj_vertex_index_t vi;
+  /* 0x80000000 = -2147483648 = invalid */
+  vi.v_idx = (int)(0x80000000);
+  vi.vn_idx = (int)(0x80000000);
+  vi.vt_idx = (int)(0x80000000);
+
+  vi.v_idx = my_atoi((*token));
+  while ((*token)[0] != '\0' && (*token)[0] != '/' && (*token)[0] != ' ' &&
+         (*token)[0] != '\t' && (*token)[0] != '\r') {
+    (*token)++;
+  }
+  if ((*token)[0] != '/') {
+    return vi;
+  }
+  (*token)++;
+
+  /* i//k */
+  if ((*token)[0] == '/') {
+    (*token)++;
+    vi.vn_idx = my_atoi((*token));
+    while ((*token)[0] != '\0' && (*token)[0] != '/' && (*token)[0] != ' ' &&
+           (*token)[0] != '\t' && (*token)[0] != '\r') {
+      (*token)++;
+    }
+    return vi;
+  }
+
+  /* i/j/k or i/j */
+  vi.vt_idx = my_atoi((*token));
+  while ((*token)[0] != '\0' && (*token)[0] != '/' && (*token)[0] != ' ' &&
+         (*token)[0] != '\t' && (*token)[0] != '\r') {
+    (*token)++;
+  }
+  if ((*token)[0] != '/') {
+    return vi;
+  }
+
+  /* i/j/k */
+  (*token)++; /* skip '/' */
+  vi.vn_idx = my_atoi((*token));
+  while ((*token)[0] != '\0' && (*token)[0] != '/' && (*token)[0] != ' ' &&
+         (*token)[0] != '\t' && (*token)[0] != '\r') {
+    (*token)++;
+  }
+  return vi;
+}
+
+static int parseInt(const char **token) {
+  int i = 0;
+  skip_space(token);
+  i = my_atoi((*token));
+  (*token) += until_space((*token));
+  return i;
+}
+
+/*
+ * Tries to parse a floating point number located at s.
+ *
+ * s_end should be a location in the string where reading should absolutely
+ * stop. For example at the end of the string, to prevent buffer overflows.
+ *
+ * Parses the following EBNF grammar:
+ *   sign    = "+" | "-" ;
+ *   END     = ? anything not in digit ?
+ *   digit   = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" ;
+ *   integer = [sign] , digit , {digit} ;
+ *   decimal = integer , ["." , integer] ;
+ *   float   = ( decimal , END ) | ( decimal , ("E" | "e") , integer , END ) ;
+ *
+ *  Valid strings are for example:
+ *   -0  +3.1417e+2  -0.0E-3  1.0324  -1.41   11e2
+ *
+ * If the parsing is a success, result is set to the parsed value and true
+ * is returned.
+ *
+ * The function is greedy and will parse until any of the following happens:
+ *  - a non-conforming character is encountered.
+ *  - s_end is reached.
+ *
+ * The following situations triggers a failure:
+ *  - s >= s_end.
+ *  - parse failure.
+ */
+static int tryParseDouble(const char *s, const char *s_end, double *result) {
+  double mantissa = 0.0;
+  /* This exponent is base 2 rather than 10.
+   * However the exponent we parse is supposed to be one of ten,
+   * thus we must take care to convert the exponent/and or the
+   * mantissa to a * 2^E, where a is the mantissa and E is the
+   * exponent.
+   * To get the final double we will use ldexp, it requires the
+   * exponent to be in base 2.
+   */
+  int exponent = 0;
+
+  /* NOTE: THESE MUST BE DECLARED HERE SINCE WE ARE NOT ALLOWED
+   * TO JUMP OVER DEFINITIONS.
+   */
+  char sign = '+';
+  char exp_sign = '+';
+  char const *curr = s;
+
+  /* How many characters were read in a loop. */
+  int read = 0;
+  /* Tells whether a loop terminated due to reaching s_end. */
+  int end_not_reached = 0;
+
+  /*
+     BEGIN PARSING.
+     */
+
+  if (s >= s_end) {
+    return 0; /* fail */
+  }
+
+  /* Find out what sign we've got. */
+  if (*curr == '+' || *curr == '-') {
+    sign = *curr;
+    curr++;
+  } else if (IS_DIGIT(*curr)) { /* Pass through. */
+  } else {
+    goto fail;
+  }
+
+  /* Read the integer part. */
+  end_not_reached = (curr != s_end);
+  while (end_not_reached && IS_DIGIT(*curr)) {
+    mantissa *= 10;
+    mantissa += (int)(*curr - 0x30);
+    curr++;
+    read++;
+    end_not_reached = (curr != s_end);
+  }
+
+  /* We must make sure we actually got something. */
+  if (read == 0) goto fail;
+  /* We allow numbers of form "#", "###" etc. */
+  if (!end_not_reached) goto assemble;
+
+  /* Read the decimal part. */
+  if (*curr == '.') {
+    curr++;
+    read = 1;
+    end_not_reached = (curr != s_end);
+    while (end_not_reached && IS_DIGIT(*curr)) {
+      /* pow(10.0, -read) */
+      double frac_value = 1.0;
+      int f;
+      for (f = 0; f < read; f++) {
+        frac_value *= 0.1;
+      }
+      mantissa += (int)(*curr - 0x30) * frac_value;
+      read++;
+      curr++;
+      end_not_reached = (curr != s_end);
+    }
+  } else if (*curr == 'e' || *curr == 'E') {
+  } else {
+    goto assemble;
+  }
+
+  if (!end_not_reached) goto assemble;
+
+  /* Read the exponent part. */
+  if (*curr == 'e' || *curr == 'E') {
+    curr++;
+    /* Figure out if a sign is present and if it is. */
+    end_not_reached = (curr != s_end);
+    if (end_not_reached && (*curr == '+' || *curr == '-')) {
+      exp_sign = *curr;
+      curr++;
+    } else if (IS_DIGIT(*curr)) { /* Pass through. */
+    } else {
+      /* Empty E is not allowed. */
+      goto fail;
+    }
+
+    read = 0;
+    end_not_reached = (curr != s_end);
+    while (end_not_reached && IS_DIGIT(*curr)) {
+      exponent *= 10;
+      exponent += (int)(*curr - 0x30);
+      curr++;
+      read++;
+      end_not_reached = (curr != s_end);
+    }
+    if (read == 0) goto fail;
+  }
+
+assemble :
+
+  {
+    double a = 1.0; /* = pow(5.0, exponent); */
+    double b  = 1.0; /* = 2.0^exponent */
+    int i;
+    for (i = 0; i < exponent; i++) {
+      a = a * 5.0;
+    }
+
+    for (i = 0; i < exponent; i++) {
+      b = b * 2.0;
+    }
+
+    if (exp_sign == '-') {
+      a = 1.0 / a;
+      b = 1.0 / b;
+    }
+
+    *result =
+      /* (sign == '+' ? 1 : -1) * ldexp(mantissa * pow(5.0, exponent),
+         exponent); */
+      (sign == '+' ? 1 : -1) * (mantissa * a * b);
+  }
+
+  return 1;
+fail:
+  return 0;
+}
+
+static float parseFloat(const char **token) {
+  const char *end;
+  double val = 0.0;
+  float f = 0.0f;
+  skip_space(token);
+  end = (*token) + until_space((*token));
+  val = 0.0;
+  tryParseDouble((*token), end, &val);
+  f = (float)(val);
+  (*token) = end;
+  return f;
+}
+
+static void parseFloat2(float *x, float *y, const char **token) {
+  (*x) = parseFloat(token);
+  (*y) = parseFloat(token);
+}
+
+static void parseFloat3(float *x, float *y, float *z, const char **token) {
+  (*x) = parseFloat(token);
+  (*y) = parseFloat(token);
+  (*z) = parseFloat(token);
+}
+
+static size_t my_strnlen(const char *s, size_t n) {
+    const char *p = (char *)memchr(s, 0, n);
+    return p ? (size_t)(p - s) : n;
+}
+
+static char *my_strdup(const char *s, size_t max_length) {
+  char *d;
+  size_t len;
+
+  if (s == NULL) return NULL;
+
+  /* Do not consider CRLF line ending(#19) */
+  len = length_until_line_feed(s, max_length);
+  /* len = strlen(s); */
+
+  /* trim line ending and append '\0' */
+  d = (char *)TINYOBJ_MALLOC(len + 1); /* + '\0' */
+  memcpy(d, s, (size_t)(len));
+  d[len] = '\0';
+
+  return d;
+}
+
+static char *my_strndup(const char *s, size_t len) {
+  char *d;
+  size_t slen;
+
+  if (s == NULL) return NULL;
+  if (len == 0) return NULL;
+
+  slen = my_strnlen(s, len);
+  d = (char *)TINYOBJ_MALLOC(slen + 1); /* + '\0' */
+  if (!d) {
+    return NULL;
+  }
+  memcpy(d, s, slen);
+  d[slen] = '\0';
+
+  return d;
+}
+
+char *dynamic_fgets(char **buf, size_t *size, FILE *file) {
+  char *offset;
+  char *ret;
+  size_t old_size;
+
+  if (!(ret = fgets(*buf, (int)*size, file))) {
+    return ret;
+  }
+
+  if (NULL != strchr(*buf, '\n')) {
+    return ret;
+  }
+
+  do {
+    old_size = *size;
+    *size *= 2;
+    *buf = (char*)TINYOBJ_REALLOC_SIZED(*buf, old_size, *size);
+    offset = &((*buf)[old_size - 1]);
+
+    ret = fgets(offset, (int)(old_size + 1), file);
+  } while(ret && (NULL == strchr(*buf, '\n')));
+
+  return ret;
+}
+
+static void initMaterial(tinyobj_material_t *material) {
+  int i;
+  material->name = NULL;
+  material->ambient_texname = NULL;
+  material->diffuse_texname = NULL;
+  material->specular_texname = NULL;
+  material->specular_highlight_texname = NULL;
+  material->bump_texname = NULL;
+  material->displacement_texname = NULL;
+  material->alpha_texname = NULL;
+  for (i = 0; i < 3; i++) {
+    material->ambient[i] = 0.f;
+    material->diffuse[i] = 0.f;
+    material->specular[i] = 0.f;
+    material->transmittance[i] = 0.f;
+    material->emission[i] = 0.f;
+  }
+  material->illum = 0;
+  material->dissolve = 1.f;
+  material->shininess = 1.f;
+  material->ior = 1.f;
+}
+
+/* Implementation of string to int hashtable */
+
+#define HASH_TABLE_ERROR 1
+#define HASH_TABLE_SUCCESS 0
+
+#define HASH_TABLE_DEFAULT_SIZE 10
+
+typedef struct hash_table_entry_t
+{
+  unsigned long hash;
+  int filled;
+  int pad0;
+  long value;
+
+  struct hash_table_entry_t* next;
+} hash_table_entry_t;
+
+typedef struct
+{
+  unsigned long* hashes;
+  hash_table_entry_t* entries;
+  size_t capacity;
+  size_t n;
+} hash_table_t;
+
+static unsigned long hash_djb2(const unsigned char* str)
+{
+  unsigned long hash = 5381;
+  int c;
+
+  while ((c = *str++)) {
+    hash = ((hash << 5) + hash) + (unsigned long)(c);
+  }
+
+  return hash;
+}
+
+static void create_hash_table(size_t start_capacity, hash_table_t* hash_table)
+{
+  if (start_capacity < 1)
+    start_capacity = HASH_TABLE_DEFAULT_SIZE;
+  hash_table->hashes = (unsigned long*) TINYOBJ_MALLOC(start_capacity * sizeof(unsigned long));
+  hash_table->entries = (hash_table_entry_t*) TINYOBJ_CALLOC(start_capacity, sizeof(hash_table_entry_t));
+  hash_table->capacity = start_capacity;
+  hash_table->n = 0;
+}
+
+static void destroy_hash_table(hash_table_t* hash_table)
+{
+  TINYOBJ_FREE(hash_table->entries);
+  TINYOBJ_FREE(hash_table->hashes);
+}
+
+/* Insert with quadratic probing */
+static int hash_table_insert_value(unsigned long hash, long value, hash_table_t* hash_table)
+{
+  /* Insert value */
+  size_t start_index = hash % hash_table->capacity;
+  size_t index = start_index;
+  hash_table_entry_t* start_entry = hash_table->entries + start_index;
+  size_t i;
+  hash_table_entry_t* entry;
+
+  for (i = 1; hash_table->entries[index].filled; i++)
+  {
+    if (i >= hash_table->capacity)
+      return HASH_TABLE_ERROR;
+    index = (start_index + (i * i)) % hash_table->capacity;
+  }
+
+  entry = hash_table->entries + index;
+  entry->hash = hash;
+  entry->filled = 1;
+  entry->value = value;
+
+  if (index != start_index) {
+    /* This is a new entry, but not the start entry, hence we need to add a next pointer to our entry */
+    entry->next = start_entry->next;
+    start_entry->next = entry;
+  }
+
+  return HASH_TABLE_SUCCESS;
+}
+
+static int hash_table_insert(unsigned long hash, long value, hash_table_t* hash_table)
+{
+  int ret = hash_table_insert_value(hash, value, hash_table);
+  if (ret == HASH_TABLE_SUCCESS)
+  {
+    hash_table->hashes[hash_table->n] = hash;
+    hash_table->n++;
+  }
+  return ret;
+}
+
+static hash_table_entry_t* hash_table_find(unsigned long hash, hash_table_t* hash_table)
+{
+  hash_table_entry_t* entry = hash_table->entries + (hash % hash_table->capacity);
+  while (entry)
+  {
+    if (entry->hash == hash && entry->filled)
+    {
+      return entry;
+    }
+    entry = entry->next;
+  }
+  return NULL;
+}
+
+static void hash_table_maybe_grow(size_t new_n, hash_table_t* hash_table)
+{
+  size_t new_capacity;
+  hash_table_t new_hash_table;
+  size_t i;
+
+  if (new_n <= hash_table->capacity) {
+    return;
+  }
+  new_capacity = 2 * ((2 * hash_table->capacity) > new_n ? hash_table->capacity : new_n);
+  /* Create a new hash table. We're not calling create_hash_table because we want to realloc the hash array */
+  new_hash_table.hashes = hash_table->hashes = (unsigned long*) TINYOBJ_REALLOC_SIZED(
+      (void*) hash_table->hashes, sizeof(unsigned long) * hash_table->capacity, sizeof(unsigned long) * new_capacity);
+  new_hash_table.entries = (hash_table_entry_t*) TINYOBJ_CALLOC(new_capacity, sizeof(hash_table_entry_t));
+  new_hash_table.capacity = new_capacity;
+  new_hash_table.n = hash_table->n;
+
+  /* Rehash */
+  for (i = 0; i < hash_table->capacity; i++)
+  {
+    hash_table_entry_t* entry = hash_table_find(hash_table->hashes[i], hash_table);
+    hash_table_insert_value(hash_table->hashes[i], entry->value, &new_hash_table);
+  }
+
+  TINYOBJ_FREE(hash_table->entries);
+  (*hash_table) = new_hash_table;
+}
+
+static int hash_table_exists(const char* name, hash_table_t* hash_table)
+{
+  return hash_table_find(hash_djb2((const unsigned char*)name), hash_table) != NULL;
+}
+
+static void hash_table_set(const char* name, size_t val, hash_table_t* hash_table)
+{
+  /* Hash name */
+  unsigned long hash = hash_djb2((const unsigned char *)name);
+
+  hash_table_entry_t* entry = hash_table_find(hash, hash_table);
+  if (entry)
+  {
+    entry->value = (long)val;
+    return;
+  }
+
+  /* Expand if necessary
+   * Grow until the element has been added
+   */
+  do
+  {
+    hash_table_maybe_grow(hash_table->n + 1, hash_table);
+  }
+  while (hash_table_insert(hash, (long)val, hash_table) != HASH_TABLE_SUCCESS);
+}
+
+static long hash_table_get(const char* name, hash_table_t* hash_table)
+{
+  hash_table_entry_t* ret = hash_table_find(hash_djb2((const unsigned char*)(name)), hash_table);
+  return ret->value;
+}
+
+static tinyobj_material_t *tinyobj_material_add(tinyobj_material_t *prev,
+                                                size_t num_materials,
+                                                tinyobj_material_t *new_mat) {
+  tinyobj_material_t *dst;
+  size_t num_bytes = sizeof(tinyobj_material_t) * num_materials;
+  dst = (tinyobj_material_t *)TINYOBJ_REALLOC_SIZED(
+                                      prev, num_bytes, num_bytes + sizeof(tinyobj_material_t));
+
+  dst[num_materials] = (*new_mat); /* Just copy pointer for char* members */
+  return dst;
+}
+
+static int is_line_ending(const char *p, size_t i, size_t end_i) {
+  if (p[i] == '\0') return 1;
+  if (p[i] == '\n') return 1; /* this includes \r\n */
+  if (p[i] == '\r') {
+    if (((i + 1) < end_i) && (p[i + 1] != '\n')) { /* detect only \r case */
+      return 1;
+    }
+  }
+  return 0;
+}
+
+typedef struct {
+  size_t pos;
+  size_t len;
+} LineInfo;
+
+/* Find '\n' and create line data. */
+static int get_line_infos(const char *buf, size_t buf_len, LineInfo **line_infos, size_t *num_lines)
+{
+  size_t i = 0;
+  size_t end_idx = buf_len;
+  size_t prev_pos = 0;
+  size_t line_no = 0;
+  size_t last_line_ending = 0;
+
+  /* Count # of lines. */
+  for (i = 0; i < end_idx; i++) {
+    if (is_line_ending(buf, i, end_idx)) {
+      (*num_lines)++;
+      last_line_ending = i;
+    }
+  }
+  /* The last char from the input may not be a line
+    * ending character so add an extra line if there
+    * are more characters after the last line ending
+    * that was found. */
+  if (end_idx - last_line_ending > 0) {
+      (*num_lines)++;
+  }
+
+  if (*num_lines == 0) return TINYOBJ_ERROR_EMPTY;
+
+  *line_infos = (LineInfo *)TINYOBJ_MALLOC(sizeof(LineInfo) * (*num_lines));
+
+  /* Fill line infos. */
+  for (i = 0; i < end_idx; i++) {
+    if (is_line_ending(buf, i, end_idx)) {
+      (*line_infos)[line_no].pos = prev_pos;
+      (*line_infos)[line_no].len = i - prev_pos;
+      prev_pos = i + 1;
+      line_no++;
+    }
+  }
+  if (end_idx - last_line_ending > 0) {
+    (*line_infos)[line_no].pos = prev_pos;
+    (*line_infos)[line_no].len = end_idx - 1 - last_line_ending;
+  }
+
+  return 0;
+}
+
+static int tinyobj_parse_and_index_mtl_file(tinyobj_material_t **materials_out,
+                                            size_t *num_materials_out,
+                                            const char *mtl_filename, const char *obj_filename, file_reader_callback file_reader, void *ctx,
+                                            hash_table_t* material_table) {
+  tinyobj_material_t material;
+  size_t num_materials = 0;
+  tinyobj_material_t *materials = NULL;
+  int has_previous_material = 0;
+  const char *line_end = NULL;
+  size_t num_lines = 0;
+  LineInfo *line_infos = NULL;
+  size_t i = 0;
+  char *buf = NULL;
+  size_t len = 0;
+
+  if (materials_out == NULL) {
+    return TINYOBJ_ERROR_INVALID_PARAMETER;
+  }
+
+  if (num_materials_out == NULL) {
+    return TINYOBJ_ERROR_INVALID_PARAMETER;
+  }
+
+  (*materials_out) = NULL;
+  (*num_materials_out) = 0;
+
+  file_reader(ctx, mtl_filename, 1, obj_filename, &buf, &len);
+  if (len < 1) return TINYOBJ_ERROR_INVALID_PARAMETER;
+  if (buf == NULL) return TINYOBJ_ERROR_INVALID_PARAMETER;
+
+  if (get_line_infos(buf, len, &line_infos, &num_lines) != 0) {
+		TINYOBJ_FREE(line_infos);
+    return TINYOBJ_ERROR_EMPTY;
+  }
+
+  /* Create a default material */
+  initMaterial(&material);
+
+  for (i = 0; i < num_lines; i++) {
+    const char *p = &buf[line_infos[i].pos];
+    size_t p_len = line_infos[i].len;
+
+    char linebuf[4096];
+    const char *token;
+    assert(p_len < 4095);
+
+    memcpy(linebuf, p, p_len);
+    linebuf[p_len] = '\0';
+
+    token = linebuf;
+    line_end = token + p_len;
+
+    /* Skip leading space. */
+    token += strspn(token, " \t");
+
+    assert(token);
+    if (token[0] == '\0') continue; /* empty line */
+
+    if (token[0] == '#') continue; /* comment line */
+
+    /* new mtl */
+    if ((0 == strncmp(token, "newmtl", 6)) && IS_SPACE((token[6]))) {
+      char namebuf[4096];
+
+      /* flush previous material. */
+      if (has_previous_material) {
+        materials = tinyobj_material_add(materials, num_materials, &material);
+        num_materials++;
+      } else {
+        has_previous_material = 1;
+      }
+
+      /* initial temporary material */
+      initMaterial(&material);
+
+      /* set new mtl name */
+      token += 7;
+#ifdef _MSC_VER
+      sscanf_s(token, "%s", namebuf, (unsigned)_countof(namebuf));
+#else
+      sscanf(token, "%s", namebuf);
+#endif
+      material.name = my_strdup(namebuf, (size_t) (line_end - token));
+
+      /* Add material to material table */
+      if (material_table)
+        hash_table_set(material.name, num_materials, material_table);
+
+      continue;
+    }
+
+    /* ambient */
+    if (token[0] == 'K' && token[1] == 'a' && IS_SPACE((token[2]))) {
+      float r, g, b;
+      token += 2;
+      parseFloat3(&r, &g, &b, &token);
+      material.ambient[0] = r;
+      material.ambient[1] = g;
+      material.ambient[2] = b;
+      continue;
+    }
+
+    /* diffuse */
+    if (token[0] == 'K' && token[1] == 'd' && IS_SPACE((token[2]))) {
+      float r, g, b;
+      token += 2;
+      parseFloat3(&r, &g, &b, &token);
+      material.diffuse[0] = r;
+      material.diffuse[1] = g;
+      material.diffuse[2] = b;
+      continue;
+    }
+
+    /* specular */
+    if (token[0] == 'K' && token[1] == 's' && IS_SPACE((token[2]))) {
+      float r, g, b;
+      token += 2;
+      parseFloat3(&r, &g, &b, &token);
+      material.specular[0] = r;
+      material.specular[1] = g;
+      material.specular[2] = b;
+      continue;
+    }
+
+    /* transmittance */
+    if (token[0] == 'K' && token[1] == 't' && IS_SPACE((token[2]))) {
+      float r, g, b;
+      token += 2;
+      parseFloat3(&r, &g, &b, &token);
+      material.transmittance[0] = r;
+      material.transmittance[1] = g;
+      material.transmittance[2] = b;
+      continue;
+    }
+
+    /* ior(index of refraction) */
+    if (token[0] == 'N' && token[1] == 'i' && IS_SPACE((token[2]))) {
+      token += 2;
+      material.ior = parseFloat(&token);
+      continue;
+    }
+
+    /* emission */
+    if (token[0] == 'K' && token[1] == 'e' && IS_SPACE(token[2])) {
+      float r, g, b;
+      token += 2;
+      parseFloat3(&r, &g, &b, &token);
+      material.emission[0] = r;
+      material.emission[1] = g;
+      material.emission[2] = b;
+      continue;
+    }
+
+    /* shininess */
+    if (token[0] == 'N' && token[1] == 's' && IS_SPACE(token[2])) {
+      token += 2;
+      material.shininess = parseFloat(&token);
+      continue;
+    }
+
+    /* illum model */
+    if (0 == strncmp(token, "illum", 5) && IS_SPACE(token[5])) {
+      token += 6;
+      material.illum = parseInt(&token);
+      continue;
+    }
+
+    /* dissolve */
+    if ((token[0] == 'd' && IS_SPACE(token[1]))) {
+      token += 1;
+      material.dissolve = parseFloat(&token);
+      continue;
+    }
+    if (token[0] == 'T' && token[1] == 'r' && IS_SPACE(token[2])) {
+      token += 2;
+      /* Invert value of Tr(assume Tr is in range [0, 1]) */
+      material.dissolve = 1.0f - parseFloat(&token);
+      continue;
+    }
+
+    /* ambient texture */
+    if ((0 == strncmp(token, "map_Ka", 6)) && IS_SPACE(token[6])) {
+      token += 7;
+      material.ambient_texname = my_strdup(token, (size_t) (line_end - token));
+      continue;
+    }
+
+    /* diffuse texture */
+    if ((0 == strncmp(token, "map_Kd", 6)) && IS_SPACE(token[6])) {
+      token += 7;
+      material.diffuse_texname = my_strdup(token, (size_t) (line_end - token));
+      continue;
+    }
+
+    /* specular texture */
+    if ((0 == strncmp(token, "map_Ks", 6)) && IS_SPACE(token[6])) {
+      token += 7;
+      material.specular_texname = my_strdup(token, (size_t) (line_end - token));
+      continue;
+    }
+
+    /* specular highlight texture */
+    if ((0 == strncmp(token, "map_Ns", 6)) && IS_SPACE(token[6])) {
+      token += 7;
+      material.specular_highlight_texname = my_strdup(token, (size_t) (line_end - token));
+      continue;
+    }
+
+    /* bump texture */
+    if ((0 == strncmp(token, "map_bump", 8)) && IS_SPACE(token[8])) {
+      token += 9;
+      material.bump_texname = my_strdup(token, (size_t) (line_end - token));
+      continue;
+    }
+
+    /* alpha texture */
+    if ((0 == strncmp(token, "map_d", 5)) && IS_SPACE(token[5])) {
+      token += 6;
+      material.alpha_texname = my_strdup(token, (size_t) (line_end - token));
+      continue;
+    }
+
+    /* bump texture */
+    if ((0 == strncmp(token, "bump", 4)) && IS_SPACE(token[4])) {
+      token += 5;
+      material.bump_texname = my_strdup(token, (size_t) (line_end - token));
+      continue;
+    }
+
+    /* displacement texture */
+    if ((0 == strncmp(token, "disp", 4)) && IS_SPACE(token[4])) {
+      token += 5;
+      material.displacement_texname = my_strdup(token, (size_t) (line_end - token));
+      continue;
+    }
+
+    /* @todo { unknown parameter } */
+  }
+
+	TINYOBJ_FREE(line_infos);
+
+  if (material.name) {
+    /* Flush last material element */
+    materials = tinyobj_material_add(materials, num_materials, &material);
+    num_materials++;
+  }
+
+  (*num_materials_out) = num_materials;
+  (*materials_out) = materials;
+
+  return TINYOBJ_SUCCESS;
+}
+
+int tinyobj_parse_mtl_file(tinyobj_material_t **materials_out,
+                           size_t *num_materials_out,
+                           const char *mtl_filename, const char *obj_filename, file_reader_callback file_reader,
+                           void *ctx) {
+  return tinyobj_parse_and_index_mtl_file(materials_out, num_materials_out, mtl_filename, obj_filename, file_reader, ctx, NULL);
+}
+
+
+typedef enum {
+  COMMAND_EMPTY,
+  COMMAND_V,
+  COMMAND_VN,
+  COMMAND_VT,
+  COMMAND_F,
+  COMMAND_G,
+  COMMAND_O,
+  COMMAND_USEMTL,
+  COMMAND_MTLLIB
+
+} CommandType;
+
+typedef struct {
+  float vx, vy, vz;
+  float nx, ny, nz;
+  float tx, ty;
+
+  /* @todo { Use dynamic array } */
+  tinyobj_vertex_index_t f[TINYOBJ_MAX_FACES_PER_F_LINE];
+  size_t num_f;
+
+  int f_num_verts[TINYOBJ_MAX_FACES_PER_F_LINE];
+  size_t num_f_num_verts;
+
+  const char *group_name;
+  unsigned int group_name_len;
+  int pad0;
+
+  const char *object_name;
+  unsigned int object_name_len;
+  int pad1;
+
+  const char *material_name;
+  unsigned int material_name_len;
+  int pad2;
+
+  const char *mtllib_name;
+  unsigned int mtllib_name_len;
+
+  CommandType type;
+} Command;
+
+static int parseLine(Command *command, const char *p, size_t p_len,
+                     int triangulate) {
+  char linebuf[4096];
+  const char *token;
+  assert(p_len < 4095);
+
+  memcpy(linebuf, p, p_len);
+  linebuf[p_len] = '\0';
+
+  token = linebuf;
+
+  command->type = COMMAND_EMPTY;
+
+  /* Skip leading space. */
+  skip_space(&token);
+
+  assert(token);
+  if (token[0] == '\0') { /* empty line */
+    return 0;
+  }
+
+  if (token[0] == '#') { /* comment line */
+    return 0;
+  }
+
+  /* vertex */
+  if (token[0] == 'v' && IS_SPACE((token[1]))) {
+    float x, y, z;
+    token += 2;
+    parseFloat3(&x, &y, &z, &token);
+    command->vx = x;
+    command->vy = y;
+    command->vz = z;
+    command->type = COMMAND_V;
+    return 1;
+  }
+
+  /* normal */
+  if (token[0] == 'v' && token[1] == 'n' && IS_SPACE((token[2]))) {
+    float x, y, z;
+    token += 3;
+    parseFloat3(&x, &y, &z, &token);
+    command->nx = x;
+    command->ny = y;
+    command->nz = z;
+    command->type = COMMAND_VN;
+    return 1;
+  }
+
+  /* texcoord */
+  if (token[0] == 'v' && token[1] == 't' && IS_SPACE((token[2]))) {
+    float x, y;
+    token += 3;
+    parseFloat2(&x, &y, &token);
+    command->tx = x;
+    command->ty = y;
+    command->type = COMMAND_VT;
+    return 1;
+  }
+
+  /* face */
+  if (token[0] == 'f' && IS_SPACE((token[1]))) {
+    size_t num_f = 0;
+
+    tinyobj_vertex_index_t f[TINYOBJ_MAX_FACES_PER_F_LINE];
+    token += 2;
+    skip_space(&token);
+
+    while (!IS_NEW_LINE(token[0])) {
+      tinyobj_vertex_index_t vi = parseRawTriple(&token);
+      skip_space_and_cr(&token);
+
+      f[num_f] = vi;
+      num_f++;
+    }
+
+    command->type = COMMAND_F;
+
+    if (triangulate) {
+      size_t k;
+      size_t n = 0;
+
+      tinyobj_vertex_index_t i0 = f[0];
+      tinyobj_vertex_index_t i1;
+      tinyobj_vertex_index_t i2 = f[1];
+
+      assert(3 * num_f < TINYOBJ_MAX_FACES_PER_F_LINE);
+
+      for (k = 2; k < num_f; k++) {
+        i1 = i2;
+        i2 = f[k];
+        command->f[3 * n + 0] = i0;
+        command->f[3 * n + 1] = i1;
+        command->f[3 * n + 2] = i2;
+
+        command->f_num_verts[n] = 3;
+        n++;
+      }
+      command->num_f = 3 * n;
+      command->num_f_num_verts = n;
+
+    } else {
+      size_t k = 0;
+      assert(num_f < TINYOBJ_MAX_FACES_PER_F_LINE);
+      for (k = 0; k < num_f; k++) {
+        command->f[k] = f[k];
+      }
+
+      command->num_f = num_f;
+      command->f_num_verts[0] = (int)num_f;
+      command->num_f_num_verts = 1;
+    }
+
+    return 1;
+  }
+
+  /* use mtl */
+  if ((0 == strncmp(token, "usemtl", 6)) && IS_SPACE((token[6]))) {
+    token += 7;
+
+    skip_space(&token);
+    command->material_name = p + (token - linebuf);
+    command->material_name_len = (unsigned int)length_until_newline(
+                                                                    token, (p_len - (size_t)(token - linebuf)) + 1);
+    command->type = COMMAND_USEMTL;
+
+    return 1;
+  }
+
+  /* load mtl */
+  if ((0 == strncmp(token, "mtllib", 6)) && IS_SPACE((token[6]))) {
+    /* By specification, `mtllib` should be appear only once in .obj */
+    token += 7;
+
+    skip_space(&token);
+    command->mtllib_name = p + (token - linebuf);
+    command->mtllib_name_len = (unsigned int)length_until_newline(
+                                                                  token, p_len - (size_t)(token - linebuf)) +
+      1;
+    command->type = COMMAND_MTLLIB;
+
+    return 1;
+  }
+
+  /* group name */
+  if (token[0] == 'g' && IS_SPACE((token[1]))) {
+    /* @todo { multiple group name. } */
+    token += 2;
+
+    command->group_name = p + (token - linebuf);
+    command->group_name_len = (unsigned int)length_until_newline(
+                                                                 token, p_len - (size_t)(token - linebuf)) +
+      1;
+    command->type = COMMAND_G;
+
+    return 1;
+  }
+
+  /* object name */
+  if (token[0] == 'o' && IS_SPACE((token[1]))) {
+    /* @todo { multiple object name? } */
+    token += 2;
+
+    command->object_name = p + (token - linebuf);
+    command->object_name_len = (unsigned int)length_until_newline(
+                                                                  token, p_len - (size_t)(token - linebuf)) +
+      1;
+    command->type = COMMAND_O;
+
+    return 1;
+  }
+
+  return 0;
+}
+
+static size_t basename_len(const char *filename, size_t filename_length) {
+  /* Count includes NUL terminator. */
+  const char *p = &filename[filename_length - 1];
+  size_t count = 1;
+
+  /* On Windows, the directory delimiter is '\' and both it and '/' is
+   * reserved by the filesystem. On *nix platforms, only the '/' character 
+   * is reserved, so account for the two cases separately. */
+  #if _WIN32
+    while (p[-1] != '/' && p[-1] != '\\') {
+      if (p == filename) {
+        count = filename_length;
+        return count;
+      }
+      count++;
+      p--;
+    }
+    p++;
+    return count;
+  #else
+    while (*(--p) != '/') {
+      if (p == filename) {
+        count = filename_length;
+        return count;
+      }
+      count++;
+    }
+    return count;
+  #endif
+}
+
+static char *generate_mtl_filename(const char *obj_filename,
+                                   size_t obj_filename_length,
+                                   const char *mtllib_name,
+                                   size_t mtllib_name_length) {
+  /* Create a dynamically-allocated material filename. This allows the material
+   * and obj files to be separated, however the mtllib name in the OBJ file
+   * must be a relative path to the material file from the OBJ's directory.
+   * This does not support the matllib name as an absolute address. */
+  char *mtl_filename;
+  char *p;
+  size_t mtl_filename_length;
+  size_t obj_basename_length;
+
+  /* Calculate required size of mtl_filename and allocate */
+  obj_basename_length = basename_len(obj_filename, obj_filename_length);
+  mtl_filename_length = (obj_filename_length - obj_basename_length) + mtllib_name_length;
+  mtl_filename = (char *)TINYOBJ_MALLOC(mtl_filename_length);
+
+  /* Copy over the obj's path */
+  memcpy(mtl_filename, obj_filename, (obj_filename_length - obj_basename_length));
+
+  /* Overwrite the obj basename with the mtllib name, filling the string */
+  p = &mtl_filename[mtl_filename_length - mtllib_name_length];
+  strcpy(p, mtllib_name);
+  return mtl_filename;
+}
+
+int tinyobj_parse_obj(tinyobj_attrib_t *attrib, tinyobj_shape_t **shapes,
+                      size_t *num_shapes, tinyobj_material_t **materials_out,
+                      size_t *num_materials_out, const char *obj_filename,
+                      file_reader_callback file_reader, void *ctx,
+                      unsigned int flags) {
+  LineInfo *line_infos = NULL;
+  Command *commands = NULL;
+  size_t num_lines = 0;
+
+  size_t num_v = 0;
+  size_t num_vn = 0;
+  size_t num_vt = 0;
+  size_t num_f = 0;
+  size_t num_faces = 0;
+
+  int mtllib_line_index = -1;
+
+  tinyobj_material_t *materials = NULL;
+  size_t num_materials = 0;
+
+  hash_table_t material_table;
+
+  char *buf = NULL;
+  size_t len = 0;
+  file_reader(ctx, obj_filename, /* is_mtl */0, obj_filename, &buf, &len);
+
+  if (len < 1) return TINYOBJ_ERROR_INVALID_PARAMETER;
+  if (attrib == NULL) return TINYOBJ_ERROR_INVALID_PARAMETER;
+  if (shapes == NULL) return TINYOBJ_ERROR_INVALID_PARAMETER;
+  if (num_shapes == NULL) return TINYOBJ_ERROR_INVALID_PARAMETER;
+  if (buf == NULL) return TINYOBJ_ERROR_INVALID_PARAMETER;
+  if (materials_out == NULL) return TINYOBJ_ERROR_INVALID_PARAMETER;
+  if (num_materials_out == NULL) return TINYOBJ_ERROR_INVALID_PARAMETER;
+
+  tinyobj_attrib_init(attrib);
+
+  /* 1. create line data */
+  if (get_line_infos(buf, len, &line_infos, &num_lines) != 0) {
+    return TINYOBJ_ERROR_EMPTY;
+  }
+
+  commands = (Command *)TINYOBJ_MALLOC(sizeof(Command) * num_lines);
+
+  create_hash_table(HASH_TABLE_DEFAULT_SIZE, &material_table);
+
+  /* 2. parse each line */
+  {
+    size_t i = 0;
+    for (i = 0; i < num_lines; i++) {
+      int ret = parseLine(&commands[i], &buf[line_infos[i].pos],
+                          line_infos[i].len, flags & TINYOBJ_FLAG_TRIANGULATE);
+      if (ret) {
+        if (commands[i].type == COMMAND_V) {
+          num_v++;
+        } else if (commands[i].type == COMMAND_VN) {
+          num_vn++;
+        } else if (commands[i].type == COMMAND_VT) {
+          num_vt++;
+        } else if (commands[i].type == COMMAND_F) {
+          num_f += commands[i].num_f;
+          num_faces += commands[i].num_f_num_verts;
+        }
+
+        if (commands[i].type == COMMAND_MTLLIB) {
+          mtllib_line_index = (int)i;
+        }
+      }
+    }
+  }
+
+  /* line_infos are not used anymore. Release memory. */
+  if (line_infos) {
+    TINYOBJ_FREE(line_infos);
+  }
+
+  /* Load material (if it exists) */
+  if (mtllib_line_index >= 0 && commands[mtllib_line_index].mtllib_name &&
+      commands[mtllib_line_index].mtllib_name_len > 0) {
+    /* Maximum length allowed by Linux - higher than Windows and macOS */
+    size_t obj_filename_len = my_strnlen(obj_filename, 4096 + 255) + 1;
+    char *mtl_filename;
+    char *mtllib_name;
+    size_t mtllib_name_len = 0;
+    int ret;
+
+    mtllib_name_len = length_until_line_feed(commands[mtllib_line_index].mtllib_name,
+                                             commands[mtllib_line_index].mtllib_name_len);
+
+    mtllib_name = my_strndup(commands[mtllib_line_index].mtllib_name,
+                             mtllib_name_len);
+
+    /* allow for NUL terminator */
+    mtllib_name_len++;
+    mtl_filename = generate_mtl_filename(obj_filename, obj_filename_len,
+                                         mtllib_name, mtllib_name_len);
+
+    ret = tinyobj_parse_and_index_mtl_file(&materials, &num_materials,
+                                           mtl_filename, obj_filename,
+                                           file_reader, ctx,
+                                           &material_table);
+
+    if (ret != TINYOBJ_SUCCESS) {
+      /* warning. */
+      fprintf(stderr, "TINYOBJ: Failed to parse material file '%s': %d\n", mtl_filename, ret);
+    }
+    TINYOBJ_FREE(mtl_filename);
+    TINYOBJ_FREE(mtllib_name);
+  }
+
+  /* Construct attributes */
+
+  {
+    size_t v_count = 0;
+    size_t n_count = 0;
+    size_t t_count = 0;
+    size_t f_count = 0;
+    size_t face_count = 0;
+    int material_id = -1; /* -1 = default unknown material. */
+    size_t i = 0;
+
+    attrib->vertices = (float *)TINYOBJ_MALLOC(sizeof(float) * num_v * 3);
+    attrib->num_vertices = (unsigned int)num_v;
+    attrib->normals = (float *)TINYOBJ_MALLOC(sizeof(float) * num_vn * 3);
+    attrib->num_normals = (unsigned int)num_vn;
+    attrib->texcoords = (float *)TINYOBJ_MALLOC(sizeof(float) * num_vt * 2);
+    attrib->num_texcoords = (unsigned int)num_vt;
+    attrib->faces = (tinyobj_vertex_index_t *)TINYOBJ_MALLOC(
+                                                     sizeof(tinyobj_vertex_index_t) * num_f);
+    attrib->num_faces = (unsigned int)num_f;
+    attrib->face_num_verts = (int *)TINYOBJ_MALLOC(sizeof(int) * num_faces);
+    attrib->material_ids = (int *)TINYOBJ_MALLOC(sizeof(int) * num_faces);
+    attrib->num_face_num_verts = (unsigned int)num_faces;
+
+    for (i = 0; i < num_lines; i++) {
+      if (commands[i].type == COMMAND_EMPTY) {
+        continue;
+      } else if (commands[i].type == COMMAND_USEMTL) {
+        /* @todo
+           if (commands[t][i].material_name &&
+           commands[t][i].material_name_len > 0) {
+           std::string material_name(commands[t][i].material_name,
+           commands[t][i].material_name_len);
+
+           if (material_map.find(material_name) != material_map.end()) {
+           material_id = material_map[material_name];
+           } else {
+        // Assign invalid material ID
+        material_id = -1;
+        }
+        }
+        */
+        if (commands[i].material_name &&
+           commands[i].material_name_len >0)
+        {
+          /* Create a null terminated string */
+          char* material_name_null_term = (char*) TINYOBJ_MALLOC(commands[i].material_name_len + 1);
+          memcpy((void*) material_name_null_term, (const void*) commands[i].material_name, commands[i].material_name_len);
+          material_name_null_term[commands[i].material_name_len] = 0;
+
+          if (hash_table_exists(material_name_null_term, &material_table))
+            material_id = (int)hash_table_get(material_name_null_term, &material_table);
+          else
+            material_id = -1;
+
+          TINYOBJ_FREE(material_name_null_term);
+        }
+      } else if (commands[i].type == COMMAND_V) {
+        attrib->vertices[3 * v_count + 0] = commands[i].vx;
+        attrib->vertices[3 * v_count + 1] = commands[i].vy;
+        attrib->vertices[3 * v_count + 2] = commands[i].vz;
+        v_count++;
+      } else if (commands[i].type == COMMAND_VN) {
+        attrib->normals[3 * n_count + 0] = commands[i].nx;
+        attrib->normals[3 * n_count + 1] = commands[i].ny;
+        attrib->normals[3 * n_count + 2] = commands[i].nz;
+        n_count++;
+      } else if (commands[i].type == COMMAND_VT) {
+        attrib->texcoords[2 * t_count + 0] = commands[i].tx;
+        attrib->texcoords[2 * t_count + 1] = commands[i].ty;
+        t_count++;
+      } else if (commands[i].type == COMMAND_F) {
+        size_t k = 0;
+        for (k = 0; k < commands[i].num_f; k++) {
+          tinyobj_vertex_index_t vi = commands[i].f[k];
+          int v_idx = fixIndex(vi.v_idx, v_count);
+          int vn_idx = fixIndex(vi.vn_idx, n_count);
+          int vt_idx = fixIndex(vi.vt_idx, t_count);
+          attrib->faces[f_count + k].v_idx = v_idx;
+          attrib->faces[f_count + k].vn_idx = vn_idx;
+          attrib->faces[f_count + k].vt_idx = vt_idx;
+        }
+
+        for (k = 0; k < commands[i].num_f_num_verts; k++) {
+          attrib->material_ids[face_count + k] = material_id;
+          attrib->face_num_verts[face_count + k] = commands[i].f_num_verts[k];
+        }
+
+        f_count += commands[i].num_f;
+        face_count += commands[i].num_f_num_verts;
+      }
+    }
+  }
+
+  /* 5. Construct shape information. */
+  {
+    unsigned int face_count = 0;
+    size_t i = 0;
+    size_t n = 0;
+    size_t shape_idx = 0;
+
+    const char *shape_name = NULL;
+    unsigned int shape_name_len = 0;
+    const char *prev_shape_name = NULL;
+    unsigned int prev_shape_name_len = 0;
+    unsigned int prev_shape_face_offset = 0;
+    unsigned int prev_face_offset = 0;
+    tinyobj_shape_t prev_shape = {NULL, 0, 0};
+
+    /* Find the number of shapes in .obj */
+    for (i = 0; i < num_lines; i++) {
+      if (commands[i].type == COMMAND_O || commands[i].type == COMMAND_G) {
+        n++;
+      }
+    }
+
+    /* Allocate array of shapes with maximum possible size(+1 for unnamed
+     * group/object).
+     * Actual # of shapes found in .obj is determined in the later */
+    (*shapes) = (tinyobj_shape_t*)TINYOBJ_MALLOC(sizeof(tinyobj_shape_t) * (n + 1));
+
+    for (i = 0; i < num_lines; i++) {
+      if (commands[i].type == COMMAND_O || commands[i].type == COMMAND_G) {
+        if (commands[i].type == COMMAND_O) {
+          shape_name = commands[i].object_name;
+          shape_name_len = commands[i].object_name_len;
+        } else {
+          shape_name = commands[i].group_name;
+          shape_name_len = commands[i].group_name_len;
+        }
+
+        if (face_count == 0) {
+          /* 'o' or 'g' appears before any 'f' */
+          prev_shape_name = shape_name;
+          prev_shape_name_len = shape_name_len;
+          prev_shape_face_offset = face_count;
+          prev_face_offset = face_count;
+        } else {
+          if (shape_idx == 0) {
+            /* 'o' or 'g' after some 'v' lines. */
+            (*shapes)[shape_idx].name = my_strndup(
+                                                   prev_shape_name, prev_shape_name_len); /* may be NULL */
+            (*shapes)[shape_idx].face_offset = prev_shape.face_offset;
+            (*shapes)[shape_idx].length = face_count - prev_face_offset;
+            shape_idx++;
+
+            prev_face_offset = face_count;
+
+          } else {
+            if ((face_count - prev_face_offset) > 0) {
+              (*shapes)[shape_idx].name =
+                my_strndup(prev_shape_name, prev_shape_name_len);
+              (*shapes)[shape_idx].face_offset = prev_face_offset;
+              (*shapes)[shape_idx].length = face_count - prev_face_offset;
+              shape_idx++;
+              prev_face_offset = face_count;
+            }
+          }
+
+          /* Record shape info for succeeding 'o' or 'g' command. */
+          prev_shape_name = shape_name;
+          prev_shape_name_len = shape_name_len;
+          prev_shape_face_offset = face_count;
+        }
+      }
+      if (commands[i].type == COMMAND_F) {
+        face_count++;
+      }
+    }
+
+    if ((face_count - prev_face_offset) > 0) {
+      size_t length = face_count - prev_shape_face_offset;
+      if (length > 0) {
+        (*shapes)[shape_idx].name =
+          my_strndup(prev_shape_name, prev_shape_name_len);
+        (*shapes)[shape_idx].face_offset = prev_face_offset;
+        (*shapes)[shape_idx].length = face_count - prev_face_offset;
+        shape_idx++;
+      }
+    } else {
+      /* Guess no 'v' line occurrence after 'o' or 'g', so discards current
+       * shape information. */
+    }
+
+    (*num_shapes) = shape_idx;
+  }
+
+  if (commands) {
+    TINYOBJ_FREE(commands);
+  }
+
+  destroy_hash_table(&material_table);
+
+  (*materials_out) = materials;
+  (*num_materials_out) = num_materials;
+
+  return TINYOBJ_SUCCESS;
+}
+
+void tinyobj_attrib_init(tinyobj_attrib_t *attrib) {
+  attrib->vertices = NULL;
+  attrib->num_vertices = 0;
+  attrib->normals = NULL;
+  attrib->num_normals = 0;
+  attrib->texcoords = NULL;
+  attrib->num_texcoords = 0;
+  attrib->faces = NULL;
+  attrib->num_faces = 0;
+  attrib->face_num_verts = NULL;
+  attrib->num_face_num_verts = 0;
+  attrib->material_ids = NULL;
+}
+
+void tinyobj_attrib_free(tinyobj_attrib_t *attrib) {
+  if (attrib->vertices) TINYOBJ_FREE(attrib->vertices);
+  if (attrib->normals) TINYOBJ_FREE(attrib->normals);
+  if (attrib->texcoords) TINYOBJ_FREE(attrib->texcoords);
+  if (attrib->faces) TINYOBJ_FREE(attrib->faces);
+  if (attrib->face_num_verts) TINYOBJ_FREE(attrib->face_num_verts);
+  if (attrib->material_ids) TINYOBJ_FREE(attrib->material_ids);
+}
+
+void tinyobj_shapes_free(tinyobj_shape_t *shapes, size_t num_shapes) {
+  size_t i;
+  if (shapes == NULL) return;
+
+  for (i = 0; i < num_shapes; i++) {
+    if (shapes[i].name) TINYOBJ_FREE(shapes[i].name);
+  }
+
+  TINYOBJ_FREE(shapes);
+}
+
+void tinyobj_materials_free(tinyobj_material_t *materials,
+                            size_t num_materials) {
+  size_t i;
+  if (materials == NULL) return;
+
+  for (i = 0; i < num_materials; i++) {
+    if (materials[i].name) TINYOBJ_FREE(materials[i].name);
+    if (materials[i].ambient_texname) TINYOBJ_FREE(materials[i].ambient_texname);
+    if (materials[i].diffuse_texname) TINYOBJ_FREE(materials[i].diffuse_texname);
+    if (materials[i].specular_texname) TINYOBJ_FREE(materials[i].specular_texname);
+    if (materials[i].specular_highlight_texname)
+      TINYOBJ_FREE(materials[i].specular_highlight_texname);
+    if (materials[i].bump_texname) TINYOBJ_FREE(materials[i].bump_texname);
+    if (materials[i].displacement_texname)
+      TINYOBJ_FREE(materials[i].displacement_texname);
+    if (materials[i].alpha_texname) TINYOBJ_FREE(materials[i].alpha_texname);
+  }
+
+  TINYOBJ_FREE(materials);
+}
+#endif /* TINYOBJ_LOADER_C_IMPLEMENTATION */
+
+#endif /* TINOBJ_LOADER_C_H_ */
diff --git a/lib/zmath/README.md b/lib/zmath/README.md
new file mode 100644
index 0000000..c11ef1f
--- /dev/null
+++ b/lib/zmath/README.md
@@ -0,0 +1,138 @@
+# zmath v0.9.6 - SIMD math library for game developers
+
+Tested on x86_64 and AArch64.
+
+Provides ~140 optimized routines and ~70 extensive tests.
+
+Can be used with any graphics API.
+
+Documentation can be found [here](https://github.com/michal-z/zig-gamedev/blob/main/libs/zmath/src/zmath.zig).
+
+Benchamrks can be found [here](https://github.com/michal-z/zig-gamedev/blob/main/libs/zmath/src/benchmark.zig).
+
+An intro article can be found [here](https://zig.news/michalz/fast-multi-platform-simd-math-library-in-zig-2adn).
+
+## Getting started
+
+Copy `zmath` folder to a `libs` subdirectory of the root of your project.
+
+Then in your `build.zig` add:
+
+```zig
+const std = @import("std");
+const zmath = @import("libs/zmath/build.zig");
+
+pub fn build(b: *std.Build) void {
+    ...
+    const optimize = b.standardOptimizeOption(.{});
+    const target = b.standardTargetOptions(.{});
+
+    zmath_pkg = zmath.package(b, target, optimize, .{
+        .options = .{ .enable_cross_platform_determinism = true },
+    });
+
+    zmath_pkg.link(exe);
+}
+```
+
+Now in your code you may import and use zmath:
+
+```zig
+const zm = @import("zmath");
+
+pub fn main() !void {
+    //
+    // OpenGL/Vulkan example
+    //
+    const object_to_world = zm.rotationY(..);
+    const world_to_view = zm.lookAtRh(
+        zm.f32x4(3.0, 3.0, 3.0, 1.0), // eye position
+        zm.f32x4(0.0, 0.0, 0.0, 1.0), // focus point
+        zm.f32x4(0.0, 1.0, 0.0, 0.0), // up direction ('w' coord is zero because this is a vector not a point)
+    );
+    // `perspectiveFovRhGl` produces Z values in [-1.0, 1.0] range (Vulkan app should use `perspectiveFovRh`)
+    const view_to_clip = zm.perspectiveFovRhGl(0.25 * math.pi, aspect_ratio, 0.1, 20.0);
+
+    const object_to_view = zm.mul(object_to_world, world_to_view);
+    const object_to_clip = zm.mul(object_to_view, view_to_clip);
+
+    // Transposition is needed because GLSL uses column-major matrices by default
+    gl.uniformMatrix4fv(0, 1, gl.TRUE, zm.arrNPtr(&object_to_clip));
+    
+    // In GLSL: gl_Position = vec4(in_position, 1.0) * object_to_clip;
+    
+    //
+    // DirectX example
+    //
+    const object_to_world = zm.rotationY(..);
+    const world_to_view = zm.lookAtLh(
+        zm.f32x4(3.0, 3.0, -3.0, 1.0), // eye position
+        zm.f32x4(0.0, 0.0, 0.0, 1.0), // focus point
+        zm.f32x4(0.0, 1.0, 0.0, 0.0), // up direction ('w' coord is zero because this is a vector not a point)
+    );
+    const view_to_clip = zm.perspectiveFovLh(0.25 * math.pi, aspect_ratio, 0.1, 20.0);
+
+    const object_to_view = zm.mul(object_to_world, world_to_view);
+    const object_to_clip = zm.mul(object_to_view, view_to_clip);
+    
+    // Transposition is needed because HLSL uses column-major matrices by default
+    const mem = allocateUploadMemory(...);
+    zm.storeMat(mem, zm.transpose(object_to_clip));
+    
+    // In HLSL: out_position_sv = mul(float4(in_position, 1.0), object_to_clip);
+    
+    //
+    // 'WASD' camera movement example
+    //
+    {
+        const speed = zm.f32x4s(10.0);
+        const delta_time = zm.f32x4s(demo.frame_stats.delta_time);
+        const transform = zm.mul(zm.rotationX(demo.camera.pitch), zm.rotationY(demo.camera.yaw));
+        var forward = zm.normalize3(zm.mul(zm.f32x4(0.0, 0.0, 1.0, 0.0), transform));
+
+        zm.storeArr3(&demo.camera.forward, forward);
+
+        const right = speed * delta_time * zm.normalize3(zm.cross3(zm.f32x4(0.0, 1.0, 0.0, 0.0), forward));
+        forward = speed * delta_time * forward;
+
+        var cam_pos = zm.loadArr3(demo.camera.position);
+
+        if (keyDown('W')) {
+            cam_pos += forward;
+        } else if (keyDown('S')) {
+            cam_pos -= forward;
+        }
+        if (keyDown('D')) {
+            cam_pos += right;
+        } else if (keyDown('A')) {
+            cam_pos -= right;
+        }
+
+        zm.storeArr3(&demo.camera.position, cam_pos);
+    }
+   
+    //
+    // SIMD wave equation solver example (works with vector width 4, 8 and 16)
+    // 'T' can be F32x4, F32x8 or F32x16
+    //
+    var z_index: i32 = 0;
+    while (z_index < grid_size) : (z_index += 1) {
+        const z = scale * @intToFloat(f32, z_index - grid_size / 2);
+        const vz = zm.splat(T, z);
+
+        var x_index: i32 = 0;
+        while (x_index < grid_size) : (x_index += zm.veclen(T)) {
+            const x = scale * @intToFloat(f32, x_index - grid_size / 2);
+            const vx = zm.splat(T, x) + voffset * zm.splat(T, scale);
+
+            const d = zm.sqrt(vx * vx + vz * vz);
+            const vy = zm.sin(d - vtime);
+
+            const index = @intCast(usize, x_index + z_index * grid_size);
+            zm.store(xslice[index..], vx, 0);
+            zm.store(yslice[index..], vy, 0);
+            zm.store(zslice[index..], vz, 0);
+        }
+    }
+}
+```
diff --git a/lib/zmath/build.zig b/lib/zmath/build.zig
new file mode 100644
index 0000000..ce21bc1
--- /dev/null
+++ b/lib/zmath/build.zig
@@ -0,0 +1,97 @@
+const std = @import("std");
+
+pub const Options = struct {
+    enable_cross_platform_determinism: bool = true,
+};
+
+pub const Package = struct {
+    options: Options,
+    zmath: *std.Build.Module,
+    zmath_options: *std.Build.Module,
+
+    pub fn link(pkg: Package, exe: *std.Build.CompileStep) void {
+        exe.addModule("zmath", pkg.zmath);
+        exe.addModule("zmath_options", pkg.zmath_options);
+    }
+};
+
+pub fn package(
+    b: *std.Build,
+    _: std.zig.CrossTarget,
+    _: std.builtin.Mode,
+    args: struct {
+        options: Options = .{},
+    },
+) Package {
+    const step = b.addOptions();
+    step.addOption(
+        bool,
+        "enable_cross_platform_determinism",
+        args.options.enable_cross_platform_determinism,
+    );
+
+    const zmath_options = step.createModule();
+
+    const zmath = b.createModule(.{
+        .source_file = .{ .path = thisDir() ++ "/src/main.zig" },
+        .dependencies = &.{
+            .{ .name = "zmath_options", .module = zmath_options },
+        },
+    });
+
+    return .{
+        .options = args.options,
+        .zmath = zmath,
+        .zmath_options = zmath_options,
+    };
+}
+
+pub fn build(b: *std.Build) void {
+    const optimize = b.standardOptimizeOption(.{});
+    const target = b.standardTargetOptions(.{});
+
+    const test_step = b.step("test", "Run zmath tests");
+    test_step.dependOn(runTests(b, optimize, target));
+
+    const benchmark_step = b.step("benchmark", "Run zmath benchmarks");
+    benchmark_step.dependOn(runBenchmarks(b, target));
+}
+
+pub fn runTests(
+    b: *std.Build,
+    optimize: std.builtin.Mode,
+    target: std.zig.CrossTarget,
+) *std.Build.Step {
+    const tests = b.addTest(.{
+        .name = "zmath-tests",
+        .root_source_file = .{ .path = thisDir() ++ "/src/main.zig" },
+        .target = target,
+        .optimize = optimize,
+    });
+
+    const zmath_pkg = package(b, target, optimize, .{});
+    tests.addModule("zmath_options", zmath_pkg.zmath_options);
+
+    return &tests.run().step;
+}
+
+pub fn runBenchmarks(
+    b: *std.Build,
+    target: std.zig.CrossTarget,
+) *std.Build.Step {
+    const exe = b.addExecutable(.{
+        .name = "zmath-benchmarks",
+        .root_source_file = .{ .path = thisDir() ++ "/src/benchmark.zig" },
+        .target = target,
+        .optimize = .ReleaseFast,
+    });
+
+    const zmath_pkg = package(b, target, .ReleaseFast, .{});
+    exe.addModule("zmath", zmath_pkg.zmath);
+
+    return &exe.run().step;
+}
+
+inline fn thisDir() []const u8 {
+    return comptime std.fs.path.dirname(@src().file) orelse ".";
+}
diff --git a/lib/zmath/src/benchmark.zig b/lib/zmath/src/benchmark.zig
new file mode 100644
index 0000000..136e29d
--- /dev/null
+++ b/lib/zmath/src/benchmark.zig
@@ -0,0 +1,469 @@
+// -------------------------------------------------------------------------------------------------
+// zmath - benchmarks
+// -------------------------------------------------------------------------------------------------
+// 'zig build benchmark' in the root project directory will build and run 'ReleaseFast' configuration.
+//
+// -------------------------------------------------------------------------------------------------
+// 'AMD Ryzen 9 3950X 16-Core Processor', Windows 11, Zig 0.10.0-dev.2620+0e9458a3f
+// -------------------------------------------------------------------------------------------------
+//                matrix mul benchmark (AOS) - scalar version: 1.5880s, zmath version: 1.0642s
+//       cross3, scale, bias benchmark (AOS) - scalar version: 0.9318s, zmath version: 0.6888s
+// cross3, dot3, scale, bias benchmark (AOS) - scalar version: 1.2258s, zmath version: 1.1095s
+//            quaternion mul benchmark (AOS) - scalar version: 1.4123s, zmath version: 0.6958s
+//                      wave benchmark (SOA) - scalar version: 4.8165s, zmath version: 0.7338s
+//
+// -------------------------------------------------------------------------------------------------
+// 'AMD Ryzen 7 5800X 8-Core Processer', Linux 5.17.14, Zig 0.10.0-dev.2624+d506275a0
+// -------------------------------------------------------------------------------------------------
+//                matrix mul benchmark (AOS) - scalar version: 1.3672s, zmath version: 0.8617s
+//       cross3, scale, bias benchmark (AOS) - scalar version: 0.6586s, zmath version: 0.4803s
+// cross3, dot3, scale, bias benchmark (AOS) - scalar version: 1.0620s, zmath version: 0.8942s
+//            quaternion mul benchmark (AOS) - scalar version: 1.1324s, zmath version: 0.6064s
+//                      wave benchmark (SOA) - scalar version: 3.6598s, zmath version: 0.4231s
+//
+// -------------------------------------------------------------------------------------------------
+// 'Apple M1 Max', macOS Version 12.4, Zig 0.10.0-dev.2657+74442f350
+// -------------------------------------------------------------------------------------------------
+//                matrix mul benchmark (AOS) - scalar version: 1.0297s, zmath version: 1.0538s
+//       cross3, scale, bias benchmark (AOS) - scalar version: 0.6294s, zmath version: 0.6532s
+// cross3, dot3, scale, bias benchmark (AOS) - scalar version: 0.9807s, zmath version: 1.0988s
+//            quaternion mul benchmark (AOS) - scalar version: 1.5413s, zmath version: 0.7800s
+//                      wave benchmark (SOA) - scalar version: 3.4220s, zmath version: 1.0255s
+//
+// -------------------------------------------------------------------------------------------------
+// '11th Gen Intel(R) Core(TM) i7-11800H @ 2.30GHz', Windows 11, Zig 0.10.0-dev.2620+0e9458a3f
+// -------------------------------------------------------------------------------------------------
+//                matrix mul benchmark (AOS) - scalar version: 2.2308s, zmath version: 0.9376s
+//       cross3, scale, bias benchmark (AOS) - scalar version: 1.0821s, zmath version: 0.5110s
+// cross3, dot3, scale, bias benchmark (AOS) - scalar version: 1.6580s, zmath version: 0.9167s
+//            quaternion mul benchmark (AOS) - scalar version: 2.0139s, zmath version: 0.5856s
+//                      wave benchmark (SOA) - scalar version: 3.7832s, zmath version: 0.3642s
+//
+// -------------------------------------------------------------------------------------------------
+
+pub fn main() !void {
+    var gpa = std.heap.GeneralPurposeAllocator(.{}){};
+    defer _ = gpa.deinit();
+    const allocator = gpa.allocator();
+
+    // m = mul(ma, mb); data set fits in L1 cache; AOS data layout.
+    try mat4MulBenchmark(allocator, 100_000);
+
+    // v = 0.01 * cross3(va, vb) + vec3(1.0); data set fits in L1 cache; AOS data layout.
+    try cross3ScaleBiasBenchmark(allocator, 10_000);
+
+    // v = dot3(va, vb) * (0.1 * cross3(va, vb) + vec3(1.0)); data set fits in L1 cache; AOS data layout.
+    try cross3Dot3ScaleBiasBenchmark(allocator, 10_000);
+
+    // q = qmul(qa, qb); data set fits in L1 cache; AOS data layout.
+    try quatBenchmark(allocator, 10_000);
+
+    // d = sqrt(x * x + z * z); y = sin(d - t); SOA layout.
+    try waveBenchmark(allocator, 1_000);
+}
+
+const std = @import("std");
+const time = std.time;
+const Timer = time.Timer;
+const zm = @import("zmath");
+
+var prng = std.rand.DefaultPrng.init(0);
+const random = prng.random();
+
+noinline fn mat4MulBenchmark(allocator: std.mem.Allocator, comptime count: comptime_int) !void {
+    std.debug.print("\n", .{});
+    std.debug.print("{s:>42} - ", .{"matrix mul benchmark (AOS)"});
+
+    var data0 = std.ArrayList([16]f32).init(allocator);
+    defer data0.deinit();
+    var data1 = std.ArrayList([16]f32).init(allocator);
+    defer data1.deinit();
+
+    var i: usize = 0;
+    while (i < 64) : (i += 1) {
+        try data0.append([16]f32{
+            random.float(f32), random.float(f32), random.float(f32), random.float(f32),
+            random.float(f32), random.float(f32), random.float(f32), random.float(f32),
+            random.float(f32), random.float(f32), random.float(f32), random.float(f32),
+            random.float(f32), random.float(f32), random.float(f32), random.float(f32),
+        });
+        try data1.append([16]f32{
+            random.float(f32), random.float(f32), random.float(f32), random.float(f32),
+            random.float(f32), random.float(f32), random.float(f32), random.float(f32),
+            random.float(f32), random.float(f32), random.float(f32), random.float(f32),
+            random.float(f32), random.float(f32), random.float(f32), random.float(f32),
+        });
+    }
+
+    // Warmup, fills L1 cache.
+    i = 0;
+    while (i < 100) : (i += 1) {
+        for (data1.items) |b| {
+            for (data0.items) |a| {
+                const ma = zm.loadMat(a[0..]);
+                const mb = zm.loadMat(b[0..]);
+                const r = zm.mul(ma, mb);
+                std.mem.doNotOptimizeAway(&r);
+            }
+        }
+    }
+
+    {
+        i = 0;
+        var timer = try Timer.start();
+        const start = timer.lap();
+        while (i < count) : (i += 1) {
+            for (data1.items) |b| {
+                for (data0.items) |a| {
+                    const r = [16]f32{
+                        a[0] * b[0] + a[1] * b[4] + a[2] * b[8] + a[3] * b[12],
+                        a[0] * b[1] + a[1] * b[5] + a[2] * b[9] + a[3] * b[13],
+                        a[0] * b[2] + a[1] * b[6] + a[2] * b[10] + a[3] * b[14],
+                        a[0] * b[3] + a[1] * b[7] + a[2] * b[11] + a[3] * b[15],
+                        a[4] * b[0] + a[5] * b[4] + a[6] * b[8] + a[7] * b[12],
+                        a[4] * b[1] + a[5] * b[5] + a[6] * b[9] + a[7] * b[13],
+                        a[4] * b[2] + a[5] * b[6] + a[6] * b[10] + a[7] * b[14],
+                        a[4] * b[3] + a[5] * b[7] + a[6] * b[11] + a[7] * b[15],
+                        a[8] * b[0] + a[9] * b[4] + a[10] * b[8] + a[11] * b[12],
+                        a[8] * b[1] + a[9] * b[5] + a[10] * b[9] + a[11] * b[13],
+                        a[8] * b[2] + a[9] * b[6] + a[10] * b[10] + a[11] * b[14],
+                        a[8] * b[3] + a[9] * b[7] + a[10] * b[11] + a[11] * b[15],
+                        a[12] * b[0] + a[13] * b[4] + a[14] * b[8] + a[15] * b[12],
+                        a[12] * b[1] + a[13] * b[5] + a[14] * b[9] + a[15] * b[13],
+                        a[12] * b[2] + a[13] * b[6] + a[14] * b[10] + a[15] * b[14],
+                        a[12] * b[3] + a[13] * b[7] + a[14] * b[11] + a[15] * b[15],
+                    };
+                    std.mem.doNotOptimizeAway(&r);
+                }
+            }
+        }
+        const end = timer.read();
+        const elapsed_s = @intToFloat(f64, end - start) / time.ns_per_s;
+
+        std.debug.print("scalar version: {d:.4}s, ", .{elapsed_s});
+    }
+
+    {
+        i = 0;
+        var timer = try Timer.start();
+        const start = timer.lap();
+        while (i < count) : (i += 1) {
+            for (data1.items) |b| {
+                for (data0.items) |a| {
+                    const ma = zm.loadMat(a[0..]);
+                    const mb = zm.loadMat(b[0..]);
+                    const r = zm.mul(ma, mb);
+                    std.mem.doNotOptimizeAway(&r);
+                }
+            }
+        }
+        const end = timer.read();
+        const elapsed_s = @intToFloat(f64, end - start) / time.ns_per_s;
+
+        std.debug.print("zmath version: {d:.4}s\n", .{elapsed_s});
+    }
+}
+
+noinline fn cross3ScaleBiasBenchmark(allocator: std.mem.Allocator, comptime count: comptime_int) !void {
+    std.debug.print("{s:>42} - ", .{"cross3, scale, bias benchmark (AOS)"});
+
+    var data0 = std.ArrayList([3]f32).init(allocator);
+    defer data0.deinit();
+    var data1 = std.ArrayList([3]f32).init(allocator);
+    defer data1.deinit();
+
+    var i: usize = 0;
+    while (i < 256) : (i += 1) {
+        try data0.append([3]f32{ random.float(f32), random.float(f32), random.float(f32) });
+        try data1.append([3]f32{ random.float(f32), random.float(f32), random.float(f32) });
+    }
+
+    // Warmup, fills L1 cache.
+    i = 0;
+    while (i < 100) : (i += 1) {
+        for (data1.items) |b| {
+            for (data0.items) |a| {
+                const va = zm.loadArr3(a);
+                const vb = zm.loadArr3(b);
+                const cp = zm.f32x4s(0.01) * zm.cross3(va, vb) + zm.f32x4s(1.0);
+                std.mem.doNotOptimizeAway(&cp);
+            }
+        }
+    }
+
+    {
+        i = 0;
+        var timer = try Timer.start();
+        const start = timer.lap();
+        while (i < count) : (i += 1) {
+            for (data1.items) |b| {
+                for (data0.items) |a| {
+                    const r = [3]f32{
+                        0.01 * (a[1] * b[2] - a[2] * b[1]) + 1.0,
+                        0.01 * (a[2] * b[0] - a[0] * b[2]) + 1.0,
+                        0.01 * (a[0] * b[1] - a[1] * b[0]) + 1.0,
+                    };
+                    std.mem.doNotOptimizeAway(&r);
+                }
+            }
+        }
+        const end = timer.read();
+        const elapsed_s = @intToFloat(f64, end - start) / time.ns_per_s;
+
+        std.debug.print("scalar version: {d:.4}s, ", .{elapsed_s});
+    }
+
+    {
+        i = 0;
+        var timer = try Timer.start();
+        const start = timer.lap();
+        while (i < count) : (i += 1) {
+            for (data1.items) |b| {
+                for (data0.items) |a| {
+                    const va = zm.loadArr3(a);
+                    const vb = zm.loadArr3(b);
+                    const cp = zm.f32x4s(0.01) * zm.cross3(va, vb) + zm.f32x4s(1.0);
+                    std.mem.doNotOptimizeAway(&cp);
+                }
+            }
+        }
+        const end = timer.read();
+        const elapsed_s = @intToFloat(f64, end - start) / time.ns_per_s;
+
+        std.debug.print("zmath version: {d:.4}s\n", .{elapsed_s});
+    }
+}
+
+noinline fn cross3Dot3ScaleBiasBenchmark(allocator: std.mem.Allocator, comptime count: comptime_int) !void {
+    std.debug.print("{s:>42} - ", .{"cross3, dot3, scale, bias benchmark (AOS)"});
+
+    var data0 = std.ArrayList([3]f32).init(allocator);
+    defer data0.deinit();
+    var data1 = std.ArrayList([3]f32).init(allocator);
+    defer data1.deinit();
+
+    var i: usize = 0;
+    while (i < 256) : (i += 1) {
+        try data0.append([3]f32{ random.float(f32), random.float(f32), random.float(f32) });
+        try data1.append([3]f32{ random.float(f32), random.float(f32), random.float(f32) });
+    }
+
+    // Warmup, fills L1 cache.
+    i = 0;
+    while (i < 100) : (i += 1) {
+        for (data1.items) |b| {
+            for (data0.items) |a| {
+                const va = zm.loadArr3(a);
+                const vb = zm.loadArr3(b);
+                const r = (zm.dot3(va, vb) * (zm.f32x4s(0.1) * zm.cross3(va, vb) + zm.f32x4s(1.0)))[0];
+                std.mem.doNotOptimizeAway(&r);
+            }
+        }
+    }
+
+    {
+        i = 0;
+        var timer = try Timer.start();
+        const start = timer.lap();
+        while (i < count) : (i += 1) {
+            for (data1.items) |b| {
+                for (data0.items) |a| {
+                    const d = a[0] * b[0] + a[1] * b[1] + a[2] * b[2];
+                    const r = [3]f32{
+                        d * (0.1 * (a[1] * b[2] - a[2] * b[1]) + 1.0),
+                        d * (0.1 * (a[2] * b[0] - a[0] * b[2]) + 1.0),
+                        d * (0.1 * (a[0] * b[1] - a[1] * b[0]) + 1.0),
+                    };
+                    std.mem.doNotOptimizeAway(&r);
+                }
+            }
+        }
+        const end = timer.read();
+        const elapsed_s = @intToFloat(f64, end - start) / time.ns_per_s;
+
+        std.debug.print("scalar version: {d:.4}s, ", .{elapsed_s});
+    }
+
+    {
+        i = 0;
+        var timer = try Timer.start();
+        const start = timer.lap();
+        while (i < count) : (i += 1) {
+            for (data1.items) |b| {
+                for (data0.items) |a| {
+                    const va = zm.loadArr3(a);
+                    const vb = zm.loadArr3(b);
+                    const r = zm.dot3(va, vb) * (zm.f32x4s(0.1) * zm.cross3(va, vb) + zm.f32x4s(1.0));
+                    std.mem.doNotOptimizeAway(&r);
+                }
+            }
+        }
+        const end = timer.read();
+        const elapsed_s = @intToFloat(f64, end - start) / time.ns_per_s;
+
+        std.debug.print("zmath version: {d:.4}s\n", .{elapsed_s});
+    }
+}
+
+noinline fn quatBenchmark(allocator: std.mem.Allocator, comptime count: comptime_int) !void {
+    std.debug.print("{s:>42} - ", .{"quaternion mul benchmark (AOS)"});
+
+    var data0 = std.ArrayList([4]f32).init(allocator);
+    defer data0.deinit();
+    var data1 = std.ArrayList([4]f32).init(allocator);
+    defer data1.deinit();
+
+    var i: usize = 0;
+    while (i < 256) : (i += 1) {
+        try data0.append([4]f32{ random.float(f32), random.float(f32), random.float(f32), random.float(f32) });
+        try data1.append([4]f32{ random.float(f32), random.float(f32), random.float(f32), random.float(f32) });
+    }
+
+    // Warmup, fills L1 cache.
+    i = 0;
+    while (i < 100) : (i += 1) {
+        for (data1.items) |b| {
+            for (data0.items) |a| {
+                const va = zm.loadArr4(a);
+                const vb = zm.loadArr4(b);
+                const r = zm.qmul(va, vb);
+                std.mem.doNotOptimizeAway(&r);
+            }
+        }
+    }
+
+    {
+        i = 0;
+        var timer = try Timer.start();
+        const start = timer.lap();
+        while (i < count) : (i += 1) {
+            for (data1.items) |b| {
+                for (data0.items) |a| {
+                    const r = [4]f32{
+                        (b[3] * a[0]) + (b[0] * a[3]) + (b[1] * a[2]) - (b[2] * a[1]),
+                        (b[3] * a[1]) - (b[0] * a[2]) + (b[1] * a[3]) + (b[2] * a[0]),
+                        (b[3] * a[2]) + (b[0] * a[1]) - (b[1] * a[0]) + (b[2] * a[3]),
+                        (b[3] * a[3]) - (b[0] * a[0]) - (b[1] * a[1]) - (b[2] * a[2]),
+                    };
+                    std.mem.doNotOptimizeAway(&r);
+                }
+            }
+        }
+        const end = timer.read();
+        const elapsed_s = @intToFloat(f64, end - start) / time.ns_per_s;
+
+        std.debug.print("scalar version: {d:.4}s, ", .{elapsed_s});
+    }
+
+    {
+        i = 0;
+        var timer = try Timer.start();
+        const start = timer.lap();
+        while (i < count) : (i += 1) {
+            for (data1.items) |b| {
+                for (data0.items) |a| {
+                    const va = zm.loadArr4(a);
+                    const vb = zm.loadArr4(b);
+                    const r = zm.qmul(va, vb);
+                    std.mem.doNotOptimizeAway(&r);
+                }
+            }
+        }
+        const end = timer.read();
+        const elapsed_s = @intToFloat(f64, end - start) / time.ns_per_s;
+
+        std.debug.print("zmath version: {d:.4}s\n", .{elapsed_s});
+    }
+}
+
+noinline fn waveBenchmark(allocator: std.mem.Allocator, comptime count: comptime_int) !void {
+    _ = allocator;
+    std.debug.print("{s:>42} - ", .{"wave benchmark (SOA)"});
+
+    const grid_size = 1024;
+    {
+        var t: f32 = 0.0;
+
+        const scale: f32 = 0.05;
+
+        var timer = try Timer.start();
+        const start = timer.lap();
+
+        var iter: usize = 0;
+        while (iter < count) : (iter += 1) {
+            var z_index: i32 = 0;
+            while (z_index < grid_size) : (z_index += 1) {
+                const z = scale * @intToFloat(f32, z_index - grid_size / 2);
+
+                var x_index: i32 = 0;
+                while (x_index < grid_size) : (x_index += 4) {
+                    const x0 = scale * @intToFloat(f32, x_index + 0 - grid_size / 2);
+                    const x1 = scale * @intToFloat(f32, x_index + 1 - grid_size / 2);
+                    const x2 = scale * @intToFloat(f32, x_index + 2 - grid_size / 2);
+                    const x3 = scale * @intToFloat(f32, x_index + 3 - grid_size / 2);
+
+                    const d0 = zm.sqrt(x0 * x0 + z * z);
+                    const d1 = zm.sqrt(x1 * x1 + z * z);
+                    const d2 = zm.sqrt(x2 * x2 + z * z);
+                    const d3 = zm.sqrt(x3 * x3 + z * z);
+
+                    const y0 = zm.sin(d0 - t);
+                    const y1 = zm.sin(d1 - t);
+                    const y2 = zm.sin(d2 - t);
+                    const y3 = zm.sin(d3 - t);
+
+                    std.mem.doNotOptimizeAway(&y0);
+                    std.mem.doNotOptimizeAway(&y1);
+                    std.mem.doNotOptimizeAway(&y2);
+                    std.mem.doNotOptimizeAway(&y3);
+                }
+            }
+            t += 0.001;
+        }
+        const end = timer.read();
+        const elapsed_s = @intToFloat(f64, end - start) / time.ns_per_s;
+
+        std.debug.print("scalar version: {d:.4}s, ", .{elapsed_s});
+    }
+
+    {
+        const T = zm.F32x16;
+
+        const static = struct {
+            const offsets = [16]f32{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
+        };
+        const voffset = zm.load(static.offsets[0..], T, 0);
+        var vt = zm.splat(T, 0.0);
+
+        const scale: f32 = 0.05;
+
+        var timer = try Timer.start();
+        const start = timer.lap();
+
+        var iter: usize = 0;
+        while (iter < count) : (iter += 1) {
+            var z_index: i32 = 0;
+            while (z_index < grid_size) : (z_index += 1) {
+                const z = scale * @intToFloat(f32, z_index - grid_size / 2);
+                const vz = zm.splat(T, z);
+
+                var x_index: i32 = 0;
+                while (x_index < grid_size) : (x_index += zm.veclen(T)) {
+                    const x = scale * @intToFloat(f32, x_index - grid_size / 2);
+                    const vx = zm.splat(T, x) + voffset * zm.splat(T, scale);
+
+                    const d = zm.sqrt(vx * vx + vz * vz);
+
+                    const vy = zm.sin(d - vt);
+
+                    std.mem.doNotOptimizeAway(&vy);
+                }
+            }
+            vt += zm.splat(T, 0.001);
+        }
+        const end = timer.read();
+        const elapsed_s = @intToFloat(f64, end - start) / time.ns_per_s;
+
+        std.debug.print("zmath version: {d:.4}s\n", .{elapsed_s});
+    }
+}
diff --git a/lib/zmath/src/main.zig b/lib/zmath/src/main.zig
new file mode 100644
index 0000000..5834745
--- /dev/null
+++ b/lib/zmath/src/main.zig
@@ -0,0 +1,18 @@
+//--------------------------------------------------------------------------------------------------
+//
+// SIMD math library for game developers
+// https://github.com/michal-z/zig-gamedev/tree/main/libs/zmath
+//
+// See zmath.zig for more details.
+// See util.zig for additional functionality.
+//
+//--------------------------------------------------------------------------------------------------
+pub const version = @import("std").SemanticVersion{ .major = 0, .minor = 9, .patch = 6 };
+
+pub usingnamespace @import("zmath.zig");
+pub const util = @import("util.zig");
+
+// ensure transitive closure of test coverage
+comptime {
+    _ = util;
+}
diff --git a/lib/zmath/src/util.zig b/lib/zmath/src/util.zig
new file mode 100644
index 0000000..aa79020
--- /dev/null
+++ b/lib/zmath/src/util.zig
@@ -0,0 +1,182 @@
+// ==============================================================================
+//
+// Collection of useful functions building on top of, and extending, core zmath.
+// https://github.com/michal-z/zig-gamedev/tree/main/libs/zmath
+//
+// ------------------------------------------------------------------------------
+// 1. Matrix functions
+// ------------------------------------------------------------------------------
+//
+// As an example, in a left handed Y-up system:
+//   getAxisX is equivalent to the right vector
+//   getAxisY is equivalent to the up vector
+//   getAxisZ is equivalent to the forward vector
+//
+// getTranslationVec(m: Mat) Vec
+// getAxisX(m: Mat) Vec
+// getAxisY(m: Mat) Vec
+// getAxisZ(m: Mat) Vec
+//
+// ==============================================================================
+
+const zm = @import("zmath.zig");
+const std = @import("std");
+const math = std.math;
+const expect = std.testing.expect;
+
+pub fn getTranslationVec(m: zm.Mat) zm.Vec {
+    var translation = m[3];
+    translation[3] = 0;
+    return translation;
+}
+
+pub fn getScaleVec(m: zm.Mat) zm.Vec {
+    const scale_x = zm.length3(zm.f32x4(m[0][0], m[1][0], m[2][0], 0))[0];
+    const scale_y = zm.length3(zm.f32x4(m[0][1], m[1][1], m[2][1], 0))[0];
+    const scale_z = zm.length3(zm.f32x4(m[0][2], m[1][2], m[2][2], 0))[0];
+    return zm.f32x4(scale_x, scale_y, scale_z, 0);
+}
+
+pub fn getRotationQuat(_m: zm.Mat) zm.Quat {
+    // Ortho normalize given matrix.
+    const c1 = zm.normalize3(zm.f32x4(_m[0][0], _m[1][0], _m[2][0], 0));
+    const c2 = zm.normalize3(zm.f32x4(_m[0][1], _m[1][1], _m[2][1], 0));
+    const c3 = zm.normalize3(zm.f32x4(_m[0][2], _m[1][2], _m[2][2], 0));
+    var m = _m;
+    m[0][0] = c1[0];
+    m[1][0] = c1[1];
+    m[2][0] = c1[2];
+    m[0][1] = c2[0];
+    m[1][1] = c2[1];
+    m[2][1] = c2[2];
+    m[0][2] = c3[0];
+    m[1][2] = c3[1];
+    m[2][2] = c3[2];
+
+    // Extract rotation
+    return zm.quatFromMat(m);
+}
+
+pub fn getAxisX(m: zm.Mat) zm.Vec {
+    return zm.normalize3(zm.f32x4(m[0][0], m[0][1], m[0][2], 0.0));
+}
+
+pub fn getAxisY(m: zm.Mat) zm.Vec {
+    return zm.normalize3(zm.f32x4(m[1][0], m[1][1], m[1][2], 0.0));
+}
+
+pub fn getAxisZ(m: zm.Mat) zm.Vec {
+    return zm.normalize3(zm.f32x4(m[2][0], m[2][1], m[2][2], 0.0));
+}
+
+test "zmath.util.mat.translation" {
+    // zig fmt: off
+    const mat_data = [18]f32{
+        1.0,
+        2.0, 3.0, 4.0, 5.0,
+        6.0, 7.0, 8.0, 9.0,
+        10.0,11.0, 12.0,13.0,
+        14.0, 15.0, 16.0, 17.0,
+        18.0,
+    };
+    // zig fmt: on
+    const mat = zm.loadMat(mat_data[1..]);
+    const translation = getTranslationVec(mat);
+    try expect(zm.approxEqAbs(translation, zm.f32x4(14.0, 15.0, 16.0, 0.0), 0.0001));
+}
+
+test "zmath.util.mat.scale" {
+    const mat = zm.mul(zm.scaling(3, 4, 5), zm.translation(6, 7, 8));
+    const scale = getScaleVec(mat);
+    try expect(zm.approxEqAbs(scale, zm.f32x4(3.0, 4.0, 5.0, 0.0), 0.0001));
+}
+
+test "zmath.util.mat.rotation" {
+    const rotate_origin = zm.matFromRollPitchYaw(0.1, 1.2, 2.3);
+    const mat = zm.mul(zm.mul(rotate_origin, zm.scaling(3, 4, 5)), zm.translation(6, 7, 8));
+    const rotate_get = getRotationQuat(mat);
+    const v0 = zm.mul(zm.f32x4s(1), rotate_origin);
+    const v1 = zm.mul(zm.f32x4s(1), zm.quatToMat(rotate_get));
+    try expect(zm.approxEqAbs(v0, v1, 0.0001));
+}
+
+test "zmath.util.mat.z_vec" {
+    const degToRad = std.math.degreesToRadians;
+    var identity = zm.identity();
+    var z_vec = getAxisZ(identity);
+    try expect(zm.approxEqAbs(z_vec, zm.f32x4(0.0, 0.0, 1.0, 0), 0.0001));
+    const rot_yaw = zm.rotationY(degToRad(f32, 90));
+    identity = zm.mul(identity, rot_yaw);
+    z_vec = getAxisZ(identity);
+    try expect(zm.approxEqAbs(z_vec, zm.f32x4(1.0, 0.0, 0.0, 0), 0.0001));
+}
+
+test "zmath.util.mat.y_vec" {
+    const degToRad = std.math.degreesToRadians;
+    var identity = zm.identity();
+    var y_vec = getAxisY(identity);
+    try expect(zm.approxEqAbs(y_vec, zm.f32x4(0.0, 1.0, 0.0, 0), 0.01));
+    const rot_yaw = zm.rotationY(degToRad(f32, 90));
+    identity = zm.mul(identity, rot_yaw);
+    y_vec = getAxisY(identity);
+    try expect(zm.approxEqAbs(y_vec, zm.f32x4(0.0, 1.0, 0.0, 0), 0.01));
+    const rot_pitch = zm.rotationX(degToRad(f32, 90));
+    identity = zm.mul(identity, rot_pitch);
+    y_vec = getAxisY(identity);
+    try expect(zm.approxEqAbs(y_vec, zm.f32x4(0.0, 0.0, 1.0, 0), 0.01));
+}
+
+test "zmath.util.mat.right" {
+    const degToRad = std.math.degreesToRadians;
+    var identity = zm.identity();
+    var right = getAxisX(identity);
+    try expect(zm.approxEqAbs(right, zm.f32x4(1.0, 0.0, 0.0, 0), 0.01));
+    const rot_yaw = zm.rotationY(degToRad(f32, 90));
+    identity = zm.mul(identity, rot_yaw);
+    right = getAxisX(identity);
+    try expect(zm.approxEqAbs(right, zm.f32x4(0.0, 0.0, -1.0, 0), 0.01));
+    const rot_pitch = zm.rotationX(degToRad(f32, 90));
+    identity = zm.mul(identity, rot_pitch);
+    right = getAxisX(identity);
+    try expect(zm.approxEqAbs(right, zm.f32x4(0.0, 1.0, 0.0, 0), 0.01));
+}
+
+// ------------------------------------------------------------------------------
+// This software is available under 2 licenses -- choose whichever you prefer.
+// ------------------------------------------------------------------------------
+// ALTERNATIVE A - MIT License
+// Copyright (c) 2022 Michal Ziulek and Contributors
+// Permission is hereby granted, free of charge, to any person obtaining identity copy of
+// this software and associated documentation files (the "Software"), to deal in
+// the Software without restriction, including without limitation the rights to
+// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+// of the Software, and to permit persons to whom the Software is furnished to do
+// so, subject to the following conditions:
+// The above copyright notice and this permission notice shall be included in all
+// copies or substantial portions of the Software.
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+// ------------------------------------------------------------------------------
+// ALTERNATIVE B - Public Domain (www.unlicense.org)
+// This is free and unencumbered software released into the public domain.
+// Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
+// software, either in source code form or as identity compiled binary, for any purpose,
+// commercial or non-commercial, and by any means.
+// In jurisdictions that recognize copyright laws, the author or authors of this
+// software dedicate any and all copyright interest in the software to the public
+// domain. We make this dedication for the benefit of the public at large and to
+// the detriment of our heirs and successors. We intend this dedication to be an
+// overt act of relinquishment in perpetuity of all present and future rights to
+// this software under copyright law.
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+// ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+// ------------------------------------------------------------------------------
diff --git a/lib/zmath/src/zmath.zig b/lib/zmath/src/zmath.zig
new file mode 100644
index 0000000..383ed55
--- /dev/null
+++ b/lib/zmath/src/zmath.zig
@@ -0,0 +1,4442 @@
+// ==============================================================================
+//
+// SIMD math library for game developers
+// https://github.com/michal-z/zig-gamedev/tree/main/libs/zmath
+//
+// Should work on all OSes supported by Zig. Works on x86_64 and ARM.
+// Provides ~140 optimized routines and ~70 extensive tests.
+// Can be used with any graphics API.
+//
+// zmath uses row-major matrices, row vectors (each row vector is stored in a SIMD register).
+// Handedness is determined by which function version is used (Rh vs. Lh),
+// otherwise the function works with either left-handed or right-handed view coordinates.
+//
+// const va = f32x4(1.0, 2.0, 3.0, 1.0);
+// const vb = f32x4(-1.0, 1.0, -1.0, 1.0);
+// const v0 = va + vb - f32x4(0.0, 1.0, 0.0, 1.0) * f32x4s(3.0);
+// const v1 = cross3(va, vb) + f32x4(1.0, 1.0, 1.0, 1.0);
+// const v2 = va + dot3(va, vb) / v1; // dotN() returns scalar replicated on all vector components
+//
+// const m = rotationX(math.pi * 0.25);
+// const v = f32x4(...);
+// const v0 = mul(v, m); // 'v' treated as a row vector
+// const v1 = mul(m, v); // 'v' treated as a column vector
+// const f = m[row][column];
+//
+// const b = va < vb;
+// if (all(b, 0)) { ... } // '0' means check all vector components; if all are 'true'
+// if (all(b, 3)) { ... } // '3' means check first three vector components; if all first three are 'true'
+// if (any(b, 0)) { ... } // '0' means check all vector components; if any is 'true'
+// if (any(b, 3)) { ... } // '3' means check first three vector components; if any from first three is 'true'
+//
+// var v4 = load(mem[0..], F32x4, 0);
+// var v8 = load(mem[100..], F32x8, 0);
+// var v16 = load(mem[200..], F32x16, 0);
+//
+// var camera_position = [3]f32{ 1.0, 2.0, 3.0 };
+// var cam_pos = loadArr3(camera_position);
+// ...
+// storeArr3(&camera_position, cam_pos);
+//
+// v4 = sin(v4); // SIMDx4
+// v8 = cos(v8); // .x86_64 -> 2 x SIMDx4, .x86_64+avx+fma -> SIMDx8
+// v16 = atan(v16); // .x86_64 -> 4 x SIMDx4, .x86_64+avx+fma -> 2 x SIMDx8, .x86_64+avx512f -> SIMDx16
+//
+// store(mem[0..], v4, 0);
+// store(mem[100..], v8, 0);
+// store(mem[200..], v16, 0);
+//
+// ------------------------------------------------------------------------------
+// 1. Initialization functions
+// ------------------------------------------------------------------------------
+//
+// f32x4(e0: f32, e1: f32, e2: f32, e3: f32) F32x4
+// f32x8(e0: f32, e1: f32, e2: f32, e3: f32, e4: f32, e5: f32, e6: f32, e7: f32) F32x8
+// f32x16(e0: f32, e1: f32, e2: f32, e3: f32, e4: f32, e5: f32, e6: f32, e7: f32,
+//        e8: f32, e9: f32, ea: f32, eb: f32, ec: f32, ed: f32, ee: f32, ef: f32) F32x16
+//
+// f32x4s(e0: f32) F32x4
+// f32x8s(e0: f32) F32x8
+// f32x16s(e0: f32) F32x16
+//
+// boolx4(e0: bool, e1: bool, e2: bool, e3: bool) Boolx4
+// boolx8(e0: bool, e1: bool, e2: bool, e3: bool, e4: bool, e5: bool, e6: bool, e7: bool) Boolx8
+// boolx16(e0: bool, e1: bool, e2: bool, e3: bool, e4: bool, e5: bool, e6: bool, e7: bool,
+//         e8: bool, e9: bool, ea: bool, eb: bool, ec: bool, ed: bool, ee: bool, ef: bool) Boolx16
+//
+// load(mem: []const f32, comptime T: type, comptime len: u32) T
+// store(mem: []f32, v: anytype, comptime len: u32) void
+//
+// loadArr2(arr: [2]f32) F32x4
+// loadArr2zw(arr: [2]f32, z: f32, w: f32) F32x4
+// loadArr3(arr: [3]f32) F32x4
+// loadArr3w(arr: [3]f32, w: f32) F32x4
+// loadArr4(arr: [4]f32) F32x4
+//
+// storeArr2(arr: *[2]f32, v: F32x4) void
+// storeArr3(arr: *[3]f32, v: F32x4) void
+// storeArr4(arr: *[4]f32, v: F32x4) void
+//
+// arr3Ptr(ptr: anytype) *const [3]f32
+// arrNPtr(ptr: anytype) [*]const f32
+//
+// splat(comptime T: type, value: f32) T
+// splatInt(comptime T: type, value: u32) T
+//
+// ------------------------------------------------------------------------------
+// 2. Functions that work on all vector components (F32xN = F32x4 or F32x8 or F32x16)
+// ------------------------------------------------------------------------------
+//
+// all(vb: anytype, comptime len: u32) bool
+// any(vb: anytype, comptime len: u32) bool
+//
+// isNearEqual(v0: F32xN, v1: F32xN, epsilon: F32xN) BoolxN
+// isNan(v: F32xN) BoolxN
+// isInf(v: F32xN) BoolxN
+// isInBounds(v: F32xN, bounds: F32xN) BoolxN
+//
+// andInt(v0: F32xN, v1: F32xN) F32xN
+// andNotInt(v0: F32xN, v1: F32xN) F32xN
+// orInt(v0: F32xN, v1: F32xN) F32xN
+// norInt(v0: F32xN, v1: F32xN) F32xN
+// xorInt(v0: F32xN, v1: F32xN) F32xN
+//
+// minFast(v0: F32xN, v1: F32xN) F32xN
+// maxFast(v0: F32xN, v1: F32xN) F32xN
+// min(v0: F32xN, v1: F32xN) F32xN
+// max(v0: F32xN, v1: F32xN) F32xN
+// round(v: F32xN) F32xN
+// floor(v: F32xN) F32xN
+// trunc(v: F32xN) F32xN
+// ceil(v: F32xN) F32xN
+// clamp(v0: F32xN, v1: F32xN) F32xN
+// clampFast(v0: F32xN, v1: F32xN) F32xN
+// saturate(v: F32xN) F32xN
+// saturateFast(v: F32xN) F32xN
+// lerp(v0: F32xN, v1: F32xN, t: f32) F32xN
+// lerpV(v0: F32xN, v1: F32xN, t: F32xN) F32xN
+// lerpInverse(v0: F32xN, v1: F32xN, t: f32) F32xN
+// lerpInverseV(v0: F32xN, v1: F32xN, t: F32xN) F32xN
+// mapLinear(v: F32xN, min1: f32, max1: f32, min2: f32, max2: f32) F32xN
+// mapLinearV(v: F32xN, min1: F32xN, max1: F32xN, min2: F32xN, max2: F32xN) F32xN
+// sqrt(v: F32xN) F32xN
+// abs(v: F32xN) F32xN
+// mod(v0: F32xN, v1: F32xN) F32xN
+// modAngle(v: F32xN) F32xN
+// mulAdd(v0: F32xN, v1: F32xN, v2: F32xN) F32xN
+// select(mask: BoolxN, v0: F32xN, v1: F32xN)
+// sin(v: F32xN) F32xN
+// cos(v: F32xN) F32xN
+// sincos(v: F32xN) [2]F32xN
+// asin(v: F32xN) F32xN
+// acos(v: F32xN) F32xN
+// atan(v: F32xN) F32xN
+// atan2(vy: F32xN, vx: F32xN) F32xN
+// cmulSoa(re0: F32xN, im0: F32xN, re1: F32xN, im1: F32xN) [2]F32xN
+//
+// ------------------------------------------------------------------------------
+// 3. 2D, 3D, 4D vector functions
+// ------------------------------------------------------------------------------
+//
+// swizzle(v: Vec, c, c, c, c) Vec (comptime c = .x | .y | .z | .w)
+// dot2(v0: Vec, v1: Vec) F32x4
+// dot3(v0: Vec, v1: Vec) F32x4
+// dot4(v0: Vec, v1: Vec) F32x4
+// cross3(v0: Vec, v1: Vec) Vec
+// lengthSq2(v: Vec) F32x4
+// lengthSq3(v: Vec) F32x4
+// lengthSq4(v: Vec) F32x4
+// length2(v: Vec) F32x4
+// length3(v: Vec) F32x4
+// length4(v: Vec) F32x4
+// normalize2(v: Vec) Vec
+// normalize3(v: Vec) Vec
+// normalize4(v: Vec) Vec
+//
+// vecToArr2(v: Vec) [2]f32
+// vecToArr3(v: Vec) [3]f32
+// vecToArr4(v: Vec) [4]f32
+//
+// ------------------------------------------------------------------------------
+// 4. Matrix functions
+// ------------------------------------------------------------------------------
+//
+// identity() Mat
+// mul(m0: Mat, m1: Mat) Mat
+// mul(s: f32, m: Mat) Mat
+// mul(m: Mat, s: f32) Mat
+// mul(v: Vec, m: Mat) Vec
+// mul(m: Mat, v: Vec) Vec
+// transpose(m: Mat) Mat
+// rotationX(angle: f32) Mat
+// rotationY(angle: f32) Mat
+// rotationZ(angle: f32) Mat
+// translation(x: f32, y: f32, z: f32) Mat
+// translationV(v: Vec) Mat
+// scaling(x: f32, y: f32, z: f32) Mat
+// scalingV(v: Vec) Mat
+// lookToLh(eyepos: Vec, eyedir: Vec, updir: Vec) Mat
+// lookAtLh(eyepos: Vec, focuspos: Vec, updir: Vec) Mat
+// lookToRh(eyepos: Vec, eyedir: Vec, updir: Vec) Mat
+// lookAtRh(eyepos: Vec, focuspos: Vec, updir: Vec) Mat
+// perspectiveFovLh(fovy: f32, aspect: f32, near: f32, far: f32) Mat
+// perspectiveFovRh(fovy: f32, aspect: f32, near: f32, far: f32) Mat
+// perspectiveFovLhGl(fovy: f32, aspect: f32, near: f32, far: f32) Mat
+// perspectiveFovRhGl(fovy: f32, aspect: f32, near: f32, far: f32) Mat
+// orthographicLh(w: f32, h: f32, near: f32, far: f32) Mat
+// orthographicRh(w: f32, h: f32, near: f32, far: f32) Mat
+// orthographicLhGl(w: f32, h: f32, near: f32, far: f32) Mat
+// orthographicRhGl(w: f32, h: f32, near: f32, far: f32) Mat
+// orthographicOffCenterLh(left: f32, right: f32, top: f32, bottom: f32, near: f32, far: f32) Mat
+// orthographicOffCenterRh(left: f32, right: f32, top: f32, bottom: f32, near: f32, far: f32) Mat
+// orthographicOffCenterLhGl(left: f32, right: f32, top: f32, bottom: f32, near: f32, far: f32) Mat
+// orthographicOffCenterRhGl(left: f32, right: f32, top: f32, bottom: f32, near: f32, far: f32) Mat
+// determinant(m: Mat) F32x4
+// inverse(m: Mat) Mat
+// inverseDet(m: Mat, det: ?*F32x4) Mat
+// matToQuat(m: Mat) Quat
+// matFromAxisAngle(axis: Vec, angle: f32) Mat
+// matFromNormAxisAngle(axis: Vec, angle: f32) Mat
+// matFromQuat(quat: Quat) Mat
+// matFromRollPitchYaw(pitch: f32, yaw: f32, roll: f32) Mat
+// matFromRollPitchYawV(angles: Vec) Mat
+//
+// loadMat(mem: []const f32) Mat
+// loadMat43(mem: []const f32) Mat
+// loadMat34(mem: []const f32) Mat
+// storeMat(mem: []f32, m: Mat) void
+// storeMat43(mem: []f32, m: Mat) void
+// storeMat34(mem: []f32, m: Mat) void
+//
+// matToArr(m: Mat) [16]f32
+// matToArr43(m: Mat) [12]f32
+// matToArr34(m: Mat) [12]f32
+//
+// ------------------------------------------------------------------------------
+// 5. Quaternion functions
+// ------------------------------------------------------------------------------
+//
+// qmul(q0: Quat, q1: Quat) Quat
+// qidentity() Quat
+// conjugate(quat: Quat) Quat
+// inverse(q: Quat) Quat
+// slerp(q0: Quat, q1: Quat, t: f32) Quat
+// slerpV(q0: Quat, q1: Quat, t: F32x4) Quat
+// quatToMat(quat: Quat) Mat
+// quatToAxisAngle(quat: Quat, axis: *Vec, angle: *f32) void
+// quatFromMat(m: Mat) Quat
+// quatFromAxisAngle(axis: Vec, angle: f32) Quat
+// quatFromNormAxisAngle(axis: Vec, angle: f32) Quat
+// quatFromRollPitchYaw(pitch: f32, yaw: f32, roll: f32) Quat
+// quatFromRollPitchYawV(angles: Vec) Quat
+//
+// ------------------------------------------------------------------------------
+// 6. Color functions
+// ------------------------------------------------------------------------------
+//
+// adjustSaturation(color: F32x4, saturation: f32) F32x4
+// adjustContrast(color: F32x4, contrast: f32) F32x4
+// rgbToHsl(rgb: F32x4) F32x4
+// hslToRgb(hsl: F32x4) F32x4
+// rgbToHsv(rgb: F32x4) F32x4
+// hsvToRgb(hsv: F32x4) F32x4
+// rgbToSrgb(rgb: F32x4) F32x4
+// srgbToRgb(srgb: F32x4) F32x4
+//
+// ------------------------------------------------------------------------------
+// X. Misc functions
+// ------------------------------------------------------------------------------
+//
+// linePointDistance(linept0: Vec, linept1: Vec, pt: Vec) F32x4
+// sin(v: f32) f32
+// cos(v: f32) f32
+// sincos(v: f32) [2]f32
+// asin(v: f32) f32
+// acos(v: f32) f32
+//
+// fftInitUnityTable(unitytable: []F32x4) void
+// fft(re: []F32x4, im: []F32x4, unitytable: []const F32x4) void
+// ifft(re: []F32x4, im: []const F32x4, unitytable: []const F32x4) void
+//
+// ==============================================================================
+
+// Fundamental types
+pub const F32x4 = @Vector(4, f32);
+pub const F32x8 = @Vector(8, f32);
+pub const F32x16 = @Vector(16, f32);
+pub const Boolx4 = @Vector(4, bool);
+pub const Boolx8 = @Vector(8, bool);
+pub const Boolx16 = @Vector(16, bool);
+
+// "Higher-level" aliases
+pub const Vec = F32x4;
+pub const Mat = [4]F32x4;
+pub const Quat = F32x4;
+
+const builtin = @import("builtin");
+const std = @import("std");
+const math = std.math;
+const assert = std.debug.assert;
+const expect = std.testing.expect;
+
+const cpu_arch = builtin.cpu.arch;
+const has_avx = if (cpu_arch == .x86_64) std.Target.x86.featureSetHas(builtin.cpu.features, .avx) else false;
+const has_avx512f = if (cpu_arch == .x86_64) std.Target.x86.featureSetHas(builtin.cpu.features, .avx512f) else false;
+const has_fma = if (cpu_arch == .x86_64) std.Target.x86.featureSetHas(builtin.cpu.features, .fma) else false;
+// ------------------------------------------------------------------------------
+//
+// 1. Initialization functions
+//
+// ------------------------------------------------------------------------------
+pub inline fn f32x4(e0: f32, e1: f32, e2: f32, e3: f32) F32x4 {
+    return .{ e0, e1, e2, e3 };
+}
+pub inline fn f32x8(e0: f32, e1: f32, e2: f32, e3: f32, e4: f32, e5: f32, e6: f32, e7: f32) F32x8 {
+    return .{ e0, e1, e2, e3, e4, e5, e6, e7 };
+}
+// zig fmt: off
+pub inline fn f32x16(
+    e0: f32, e1: f32, e2: f32, e3: f32, e4: f32, e5: f32, e6: f32, e7: f32,
+    e8: f32, e9: f32, ea: f32, eb: f32, ec: f32, ed: f32, ee: f32, ef: f32) F32x16 {
+    return .{ e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, ea, eb, ec, ed, ee, ef };
+}
+// zig fmt: on
+
+pub inline fn f32x4s(e0: f32) F32x4 {
+    return splat(F32x4, e0);
+}
+pub inline fn f32x8s(e0: f32) F32x8 {
+    return splat(F32x8, e0);
+}
+pub inline fn f32x16s(e0: f32) F32x16 {
+    return splat(F32x16, e0);
+}
+
+pub inline fn boolx4(e0: bool, e1: bool, e2: bool, e3: bool) Boolx4 {
+    return .{ e0, e1, e2, e3 };
+}
+pub inline fn boolx8(e0: bool, e1: bool, e2: bool, e3: bool, e4: bool, e5: bool, e6: bool, e7: bool) Boolx8 {
+    return .{ e0, e1, e2, e3, e4, e5, e6, e7 };
+}
+// zig fmt: off
+pub inline fn boolx16(
+    e0: bool, e1: bool, e2: bool, e3: bool, e4: bool, e5: bool, e6: bool, e7: bool,
+    e8: bool, e9: bool, ea: bool, eb: bool, ec: bool, ed: bool, ee: bool, ef: bool) Boolx16 {
+    return .{ e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, ea, eb, ec, ed, ee, ef };
+}
+// zig fmt: on
+
+pub inline fn veclen(comptime T: type) comptime_int {
+    return @typeInfo(T).Vector.len;
+}
+
+pub inline fn splat(comptime T: type, value: f32) T {
+    return @splat(veclen(T), value);
+}
+pub inline fn splatInt(comptime T: type, value: u32) T {
+    return @splat(veclen(T), @bitCast(f32, value));
+}
+
+pub fn load(mem: []const f32, comptime T: type, comptime len: u32) T {
+    var v = splat(T, 0.0);
+    comptime var loop_len = if (len == 0) veclen(T) else len;
+    comptime var i: u32 = 0;
+    inline while (i < loop_len) : (i += 1) {
+        v[i] = mem[i];
+    }
+    return v;
+}
+test "zmath.load" {
+    const a = [7]f32{ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0 };
+    var ptr = &a;
+    var i: u32 = 0;
+    const v0 = load(a[i..], F32x4, 2);
+    try expect(approxEqAbs(v0, F32x4{ 1.0, 2.0, 0.0, 0.0 }, 0.0));
+    i += 2;
+    const v1 = load(a[i .. i + 2], F32x4, 2);
+    try expect(approxEqAbs(v1, F32x4{ 3.0, 4.0, 0.0, 0.0 }, 0.0));
+    const v2 = load(a[5..7], F32x4, 2);
+    try expect(approxEqAbs(v2, F32x4{ 6.0, 7.0, 0.0, 0.0 }, 0.0));
+    const v3 = load(ptr[1..], F32x4, 2);
+    try expect(approxEqAbs(v3, F32x4{ 2.0, 3.0, 0.0, 0.0 }, 0.0));
+    i += 1;
+    const v4 = load(ptr[i .. i + 2], F32x4, 2);
+    try expect(approxEqAbs(v4, F32x4{ 4.0, 5.0, 0.0, 0.0 }, 0.0));
+}
+
+pub fn store(mem: []f32, v: anytype, comptime len: u32) void {
+    const T = @TypeOf(v);
+    comptime var loop_len = if (len == 0) veclen(T) else len;
+    comptime var i: u32 = 0;
+    inline while (i < loop_len) : (i += 1) {
+        mem[i] = v[i];
+    }
+}
+test "zmath.store" {
+    var a = [7]f32{ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0 };
+    const v = load(a[1..], F32x4, 3);
+    store(a[2..], v, 4);
+    try expect(a[0] == 1.0);
+    try expect(a[1] == 2.0);
+    try expect(a[2] == 2.0);
+    try expect(a[3] == 3.0);
+    try expect(a[4] == 4.0);
+    try expect(a[5] == 0.0);
+}
+
+pub inline fn loadArr2(arr: [2]f32) F32x4 {
+    return f32x4(arr[0], arr[1], 0.0, 0.0);
+}
+pub inline fn loadArr2zw(arr: [2]f32, z: f32, w: f32) F32x4 {
+    return f32x4(arr[0], arr[1], z, w);
+}
+pub inline fn loadArr3(arr: [3]f32) F32x4 {
+    return f32x4(arr[0], arr[1], arr[2], 0.0);
+}
+pub inline fn loadArr3w(arr: [3]f32, w: f32) F32x4 {
+    return f32x4(arr[0], arr[1], arr[2], w);
+}
+pub inline fn loadArr4(arr: [4]f32) F32x4 {
+    return f32x4(arr[0], arr[1], arr[2], arr[3]);
+}
+
+pub inline fn storeArr2(arr: *[2]f32, v: F32x4) void {
+    arr.* = .{ v[0], v[1] };
+}
+pub inline fn storeArr3(arr: *[3]f32, v: F32x4) void {
+    arr.* = .{ v[0], v[1], v[2] };
+}
+pub inline fn storeArr4(arr: *[4]f32, v: F32x4) void {
+    arr.* = .{ v[0], v[1], v[2], v[3] };
+}
+
+pub inline fn arr3Ptr(ptr: anytype) *const [3]f32 {
+    comptime assert(@typeInfo(@TypeOf(ptr)) == .Pointer);
+    const T = std.meta.Child(@TypeOf(ptr));
+    comptime assert(T == F32x4);
+    return @ptrCast(*const [3]f32, ptr);
+}
+
+pub inline fn arrNPtr(ptr: anytype) [*]const f32 {
+    comptime assert(@typeInfo(@TypeOf(ptr)) == .Pointer);
+    const T = std.meta.Child(@TypeOf(ptr));
+    comptime assert(T == Mat or T == F32x4 or T == F32x8 or T == F32x16);
+    return @ptrCast([*]const f32, ptr);
+}
+test "zmath.arrNPtr" {
+    {
+        const mat = identity();
+        const f32ptr = arrNPtr(&mat);
+        try expect(f32ptr[0] == 1.0);
+        try expect(f32ptr[5] == 1.0);
+        try expect(f32ptr[10] == 1.0);
+        try expect(f32ptr[15] == 1.0);
+    }
+    {
+        const v8 = f32x8s(1.0);
+        const f32ptr = arrNPtr(&v8);
+        try expect(f32ptr[1] == 1.0);
+        try expect(f32ptr[7] == 1.0);
+    }
+}
+
+test "zmath.loadArr" {
+    {
+        const camera_position = [3]f32{ 1.0, 2.0, 3.0 };
+        const simd_reg = loadArr3(camera_position);
+        try expect(approxEqAbs(simd_reg, f32x4(1.0, 2.0, 3.0, 0.0), 0.0));
+    }
+    {
+        const camera_position = [3]f32{ 1.0, 2.0, 3.0 };
+        const simd_reg = loadArr3w(camera_position, 1.0);
+        try expect(approxEqAbs(simd_reg, f32x4(1.0, 2.0, 3.0, 1.0), 0.0));
+    }
+}
+
+pub inline fn vecToArr2(v: Vec) [2]f32 {
+    return .{ v[0], v[1] };
+}
+pub inline fn vecToArr3(v: Vec) [3]f32 {
+    return .{ v[0], v[1], v[2] };
+}
+pub inline fn vecToArr4(v: Vec) [4]f32 {
+    return .{ v[0], v[1], v[2], v[3] };
+}
+// ------------------------------------------------------------------------------
+//
+// 2. Functions that work on all vector components (F32xN = F32x4 or F32x8 or F32x16)
+//
+// ------------------------------------------------------------------------------
+pub fn all(vb: anytype, comptime len: u32) bool {
+    const T = @TypeOf(vb);
+    if (len > veclen(T)) {
+        @compileError("zmath.all(): 'len' is greater than vector len of type " ++ @typeName(T));
+    }
+    comptime var loop_len = if (len == 0) veclen(T) else len;
+    const ab: [veclen(T)]bool = vb;
+    comptime var i: u32 = 0;
+    var result = true;
+    inline while (i < loop_len) : (i += 1) {
+        result = result and ab[i];
+    }
+    return result;
+}
+test "zmath.all" {
+    try expect(all(boolx8(true, true, true, true, true, false, true, false), 5) == true);
+    try expect(all(boolx8(true, true, true, true, true, false, true, false), 6) == false);
+    try expect(all(boolx8(true, true, true, true, false, false, false, false), 4) == true);
+    try expect(all(boolx4(true, true, true, false), 3) == true);
+    try expect(all(boolx4(true, true, true, false), 1) == true);
+    try expect(all(boolx4(true, false, false, false), 1) == true);
+    try expect(all(boolx4(false, true, false, false), 1) == false);
+    try expect(all(boolx8(true, true, true, true, true, false, true, false), 0) == false);
+    try expect(all(boolx4(false, true, false, false), 0) == false);
+    try expect(all(boolx4(true, true, true, true), 0) == true);
+}
+
+pub fn any(vb: anytype, comptime len: u32) bool {
+    const T = @TypeOf(vb);
+    if (len > veclen(T)) {
+        @compileError("zmath.any(): 'len' is greater than vector len of type " ++ @typeName(T));
+    }
+    comptime var loop_len = if (len == 0) veclen(T) else len;
+    const ab: [veclen(T)]bool = vb;
+    comptime var i: u32 = 0;
+    var result = false;
+    inline while (i < loop_len) : (i += 1) {
+        result = result or ab[i];
+    }
+    return result;
+}
+test "zmath.any" {
+    try expect(any(boolx8(true, true, true, true, true, false, true, false), 0) == true);
+    try expect(any(boolx8(false, false, false, true, true, false, true, false), 3) == false);
+    try expect(any(boolx8(false, false, false, false, false, true, false, false), 4) == false);
+}
+
+pub inline fn isNearEqual(
+    v0: anytype,
+    v1: anytype,
+    epsilon: anytype,
+) @Vector(veclen(@TypeOf(v0)), bool) {
+    const T = @TypeOf(v0, v1, epsilon);
+    const delta = v0 - v1;
+    const temp = maxFast(delta, splat(T, 0.0) - delta);
+    return temp <= epsilon;
+}
+test "zmath.isNearEqual" {
+    if (builtin.target.os.tag == .macos and builtin.zig_backend != .stage1) return error.SkipZigTest;
+    {
+        const v0 = f32x4(1.0, 2.0, -3.0, 4.001);
+        const v1 = f32x4(1.0, 2.1, 3.0, 4.0);
+        const b = isNearEqual(v0, v1, splat(F32x4, 0.01));
+        try expect(@reduce(.And, b == boolx4(true, false, false, true)));
+    }
+    {
+        const v0 = f32x8(1.0, 2.0, -3.0, 4.001, 1.001, 2.3, -0.0, 0.0);
+        const v1 = f32x8(1.0, 2.1, 3.0, 4.0, -1.001, 2.1, 0.0, 0.0);
+        const b = isNearEqual(v0, v1, splat(F32x8, 0.01));
+        try expect(@reduce(.And, b == boolx8(true, false, false, true, false, false, true, true)));
+    }
+    try expect(all(isNearEqual(
+        splat(F32x4, math.inf_f32),
+        splat(F32x4, math.inf_f32),
+        splat(F32x4, 0.0001),
+    ), 0) == false);
+    try expect(all(isNearEqual(
+        splat(F32x4, -math.inf_f32),
+        splat(F32x4, math.inf_f32),
+        splat(F32x4, 0.0001),
+    ), 0) == false);
+    try expect(all(isNearEqual(
+        splat(F32x4, -math.inf_f32),
+        splat(F32x4, -math.inf_f32),
+        splat(F32x4, 0.0001),
+    ), 0) == false);
+    try expect(all(isNearEqual(
+        splat(F32x4, -math.nan_f32),
+        splat(F32x4, math.inf_f32),
+        splat(F32x4, 0.0001),
+    ), 0) == false);
+}
+
+pub inline fn isNan(
+    v: anytype,
+) @Vector(veclen(@TypeOf(v)), bool) {
+    return v != v;
+}
+test "zmath.isNan" {
+    {
+        const v0 = f32x4(math.inf_f32, math.nan_f32, math.nan_f32, 7.0);
+        const b = isNan(v0);
+        try expect(@reduce(.And, b == boolx4(false, true, true, false)));
+    }
+    {
+        const v0 = f32x8(0, math.nan_f32, 0, 0, math.inf_f32, math.nan_f32, math.qnan_f32, 7.0);
+        const b = isNan(v0);
+        try expect(@reduce(.And, b == boolx8(false, true, false, false, false, true, true, false)));
+    }
+}
+
+pub inline fn isInf(
+    v: anytype,
+) @Vector(veclen(@TypeOf(v)), bool) {
+    const T = @TypeOf(v);
+    return abs(v) == splat(T, math.inf_f32);
+}
+test "zmath.isInf" {
+    {
+        const v0 = f32x4(math.inf_f32, math.nan_f32, math.qnan_f32, 7.0);
+        const b = isInf(v0);
+        try expect(@reduce(.And, b == boolx4(true, false, false, false)));
+    }
+    {
+        const v0 = f32x8(0, math.inf_f32, 0, 0, math.inf_f32, math.nan_f32, math.qnan_f32, 7.0);
+        const b = isInf(v0);
+        try expect(@reduce(.And, b == boolx8(false, true, false, false, true, false, false, false)));
+    }
+}
+
+pub inline fn isInBounds(
+    v: anytype,
+    bounds: anytype,
+) @Vector(veclen(@TypeOf(v)), bool) {
+    const T = @TypeOf(v, bounds);
+    const Tu = @Vector(veclen(T), u1);
+    const Tr = @Vector(veclen(T), bool);
+
+    // 2 x cmpleps, xorps, load, andps
+    const b0 = v <= bounds;
+    const b1 = (bounds * splat(T, -1.0)) <= v;
+    const b0u = @bitCast(Tu, b0);
+    const b1u = @bitCast(Tu, b1);
+    return @bitCast(Tr, b0u & b1u);
+}
+test "zmath.isInBounds" {
+    {
+        const v0 = f32x4(0.5, -2.0, -1.0, 1.9);
+        const v1 = f32x4(-1.6, -2.001, -1.0, 1.9);
+        const bounds = f32x4(1.0, 2.0, 1.0, 2.0);
+        const b0 = isInBounds(v0, bounds);
+        const b1 = isInBounds(v1, bounds);
+        try expect(@reduce(.And, b0 == boolx4(true, true, true, true)));
+        try expect(@reduce(.And, b1 == boolx4(false, false, true, true)));
+    }
+    {
+        const v0 = f32x8(2.0, 1.0, 2.0, 1.0, 0.5, -2.0, -1.0, 1.9);
+        const bounds = f32x8(1.0, 1.0, 1.0, math.inf_f32, 1.0, math.nan_f32, 1.0, 2.0);
+        const b0 = isInBounds(v0, bounds);
+        try expect(@reduce(.And, b0 == boolx8(false, true, false, true, true, false, true, true)));
+    }
+}
+
+pub inline fn andInt(v0: anytype, v1: anytype) @TypeOf(v0, v1) {
+    const T = @TypeOf(v0, v1);
+    const Tu = @Vector(veclen(T), u32);
+    const v0u = @bitCast(Tu, v0);
+    const v1u = @bitCast(Tu, v1);
+    return @bitCast(T, v0u & v1u); // andps
+}
+test "zmath.andInt" {
+    {
+        const v0 = f32x4(0, @bitCast(f32, ~@as(u32, 0)), 0, @bitCast(f32, ~@as(u32, 0)));
+        const v1 = f32x4(1.0, 2.0, 3.0, math.inf_f32);
+        const v = andInt(v0, v1);
+        try expect(v[3] == math.inf_f32);
+        try expect(approxEqAbs(v, f32x4(0.0, 2.0, 0.0, math.inf_f32), 0.0));
+    }
+    {
+        const v0 = f32x8(0, 0, 0, 0, 0, @bitCast(f32, ~@as(u32, 0)), 0, @bitCast(f32, ~@as(u32, 0)));
+        const v1 = f32x8(0, 0, 0, 0, 1.0, 2.0, 3.0, math.inf_f32);
+        const v = andInt(v0, v1);
+        try expect(v[7] == math.inf_f32);
+        try expect(approxEqAbs(v, f32x8(0, 0, 0, 0, 0.0, 2.0, 0.0, math.inf_f32), 0.0));
+    }
+}
+
+pub inline fn andNotInt(v0: anytype, v1: anytype) @TypeOf(v0, v1) {
+    const T = @TypeOf(v0, v1);
+    const Tu = @Vector(veclen(T), u32);
+    const v0u = @bitCast(Tu, v0);
+    const v1u = @bitCast(Tu, v1);
+    return @bitCast(T, ~v0u & v1u); // andnps
+}
+test "zmath.andNotInt" {
+    {
+        const v0 = f32x4(1.0, 2.0, 3.0, 4.0);
+        const v1 = f32x4(0, @bitCast(f32, ~@as(u32, 0)), 0, @bitCast(f32, ~@as(u32, 0)));
+        const v = andNotInt(v1, v0);
+        try expect(approxEqAbs(v, f32x4(1.0, 0.0, 3.0, 0.0), 0.0));
+    }
+    {
+        const v0 = f32x8(0, 0, 0, 0, 1.0, 2.0, 3.0, 4.0);
+        const v1 = f32x8(0, 0, 0, 0, 0, @bitCast(f32, ~@as(u32, 0)), 0, @bitCast(f32, ~@as(u32, 0)));
+        const v = andNotInt(v1, v0);
+        try expect(approxEqAbs(v, f32x8(0, 0, 0, 0, 1.0, 0.0, 3.0, 0.0), 0.0));
+    }
+}
+
+pub inline fn orInt(v0: anytype, v1: anytype) @TypeOf(v0, v1) {
+    const T = @TypeOf(v0, v1);
+    const Tu = @Vector(veclen(T), u32);
+    const v0u = @bitCast(Tu, v0);
+    const v1u = @bitCast(Tu, v1);
+    return @bitCast(T, v0u | v1u); // orps
+}
+test "zmath.orInt" {
+    {
+        const v0 = f32x4(0, @bitCast(f32, ~@as(u32, 0)), 0, 0);
+        const v1 = f32x4(1.0, 2.0, 3.0, 4.0);
+        const v = orInt(v0, v1);
+        try expect(v[0] == 1.0);
+        try expect(@bitCast(u32, v[1]) == ~@as(u32, 0));
+        try expect(v[2] == 3.0);
+        try expect(v[3] == 4.0);
+    }
+    {
+        const v0 = f32x8(0, 0, 0, 0, 0, @bitCast(f32, ~@as(u32, 0)), 0, 0);
+        const v1 = f32x8(0, 0, 0, 0, 1.0, 2.0, 3.0, 4.0);
+        const v = orInt(v0, v1);
+        try expect(v[4] == 1.0);
+        try expect(@bitCast(u32, v[5]) == ~@as(u32, 0));
+        try expect(v[6] == 3.0);
+        try expect(v[7] == 4.0);
+    }
+}
+
+pub inline fn norInt(v0: anytype, v1: anytype) @TypeOf(v0, v1) {
+    const T = @TypeOf(v0, v1);
+    const Tu = @Vector(veclen(T), u32);
+    const v0u = @bitCast(Tu, v0);
+    const v1u = @bitCast(Tu, v1);
+    return @bitCast(T, ~(v0u | v1u)); // por, pcmpeqd, pxor
+}
+
+pub inline fn xorInt(v0: anytype, v1: anytype) @TypeOf(v0, v1) {
+    const T = @TypeOf(v0, v1);
+    const Tu = @Vector(veclen(T), u32);
+    const v0u = @bitCast(Tu, v0);
+    const v1u = @bitCast(Tu, v1);
+    return @bitCast(T, v0u ^ v1u); // xorps
+}
+test "zmath.xorInt" {
+    {
+        const v0 = f32x4(1.0, @bitCast(f32, ~@as(u32, 0)), 0, 0);
+        const v1 = f32x4(1.0, 0, 0, 0);
+        const v = xorInt(v0, v1);
+        try expect(v[0] == 0.0);
+        try expect(@bitCast(u32, v[1]) == ~@as(u32, 0));
+        try expect(v[2] == 0.0);
+        try expect(v[3] == 0.0);
+    }
+    {
+        const v0 = f32x8(0, 0, 0, 0, 1.0, @bitCast(f32, ~@as(u32, 0)), 0, 0);
+        const v1 = f32x8(0, 0, 0, 0, 1.0, 0, 0, 0);
+        const v = xorInt(v0, v1);
+        try expect(v[4] == 0.0);
+        try expect(@bitCast(u32, v[5]) == ~@as(u32, 0));
+        try expect(v[6] == 0.0);
+        try expect(v[7] == 0.0);
+    }
+}
+
+pub inline fn minFast(v0: anytype, v1: anytype) @TypeOf(v0, v1) {
+    return select(v0 < v1, v0, v1); // minps
+}
+test "zmath.minFast" {
+    {
+        const v0 = f32x4(1.0, 3.0, 2.0, 7.0);
+        const v1 = f32x4(2.0, 1.0, 4.0, math.inf_f32);
+        const v = minFast(v0, v1);
+        try expect(approxEqAbs(v, f32x4(1.0, 1.0, 2.0, 7.0), 0.0));
+    }
+    {
+        const v0 = f32x4(1.0, math.nan_f32, 5.0, math.qnan_f32);
+        const v1 = f32x4(2.0, 1.0, 4.0, math.inf_f32);
+        const v = minFast(v0, v1);
+        try expect(v[0] == 1.0);
+        try expect(v[1] == 1.0);
+        try expect(!math.isNan(v[1]));
+        try expect(v[2] == 4.0);
+        try expect(v[3] == math.inf_f32);
+        try expect(!math.isNan(v[3]));
+    }
+}
+
+pub inline fn maxFast(v0: anytype, v1: anytype) @TypeOf(v0, v1) {
+    return select(v0 > v1, v0, v1); // maxps
+}
+test "zmath.maxFast" {
+    {
+        const v0 = f32x4(1.0, 3.0, 2.0, 7.0);
+        const v1 = f32x4(2.0, 1.0, 4.0, math.inf_f32);
+        const v = maxFast(v0, v1);
+        try expect(approxEqAbs(v, f32x4(2.0, 3.0, 4.0, math.inf_f32), 0.0));
+    }
+    {
+        const v0 = f32x4(1.0, math.nan_f32, 5.0, math.qnan_f32);
+        const v1 = f32x4(2.0, 1.0, 4.0, math.inf_f32);
+        const v = maxFast(v0, v1);
+        try expect(v[0] == 2.0);
+        try expect(v[1] == 1.0);
+        try expect(v[2] == 5.0);
+        try expect(v[3] == math.inf_f32);
+        try expect(!math.isNan(v[3]));
+    }
+}
+
+pub inline fn min(v0: anytype, v1: anytype) @TypeOf(v0, v1) {
+    // This will handle inf & nan
+    return @min(v0, v1); // minps, cmpunordps, andps, andnps, orps
+}
+test "zmath.min" {
+    if (builtin.target.os.tag == .macos) return error.SkipZigTest;
+    {
+        const v0 = f32x4(1.0, 3.0, 2.0, 7.0);
+        const v1 = f32x4(2.0, 1.0, 4.0, math.inf_f32);
+        const v = min(v0, v1);
+        try expect(approxEqAbs(v, f32x4(1.0, 1.0, 2.0, 7.0), 0.0));
+    }
+    {
+        const v0 = f32x8(0, 0, -2.0, 0, 1.0, 3.0, 2.0, 7.0);
+        const v1 = f32x8(0, 1.0, 0, 0, 2.0, 1.0, 4.0, math.inf_f32);
+        const v = min(v0, v1);
+        try expect(approxEqAbs(v, f32x8(0.0, 0.0, -2.0, 0.0, 1.0, 1.0, 2.0, 7.0), 0.0));
+    }
+    {
+        const v0 = f32x4(1.0, math.nan_f32, 5.0, math.qnan_f32);
+        const v1 = f32x4(2.0, 1.0, 4.0, math.inf_f32);
+        const v = min(v0, v1);
+        try expect(v[0] == 1.0);
+        try expect(v[1] == 1.0);
+        try expect(!math.isNan(v[1]));
+        try expect(v[2] == 4.0);
+        try expect(v[3] == math.inf_f32);
+        try expect(!math.isNan(v[3]));
+    }
+    {
+        const v0 = f32x4(-math.inf_f32, math.inf_f32, math.inf_f32, math.qnan_f32);
+        const v1 = f32x4(math.qnan_f32, -math.inf_f32, math.qnan_f32, math.nan_f32);
+        const v = min(v0, v1);
+        try expect(v[0] == -math.inf_f32);
+        try expect(v[1] == -math.inf_f32);
+        try expect(v[2] == math.inf_f32);
+        try expect(!math.isNan(v[2]));
+        try expect(math.isNan(v[3]));
+        try expect(!math.isInf(v[3]));
+    }
+}
+
+pub inline fn max(v0: anytype, v1: anytype) @TypeOf(v0, v1) {
+    // This will handle inf & nan
+    return @max(v0, v1); // maxps, cmpunordps, andps, andnps, orps
+}
+test "zmath.max" {
+    if (builtin.target.os.tag == .macos) return error.SkipZigTest;
+    {
+        const v0 = f32x4(1.0, 3.0, 2.0, 7.0);
+        const v1 = f32x4(2.0, 1.0, 4.0, math.inf_f32);
+        const v = max(v0, v1);
+        try expect(approxEqAbs(v, f32x4(2.0, 3.0, 4.0, math.inf_f32), 0.0));
+    }
+    {
+        const v0 = f32x8(0, 0, -2.0, 0, 1.0, 3.0, 2.0, 7.0);
+        const v1 = f32x8(0, 1.0, 0, 0, 2.0, 1.0, 4.0, math.inf_f32);
+        const v = max(v0, v1);
+        try expect(approxEqAbs(v, f32x8(0.0, 1.0, 0.0, 0.0, 2.0, 3.0, 4.0, math.inf_f32), 0.0));
+    }
+    {
+        const v0 = f32x4(1.0, math.nan_f32, 5.0, math.qnan_f32);
+        const v1 = f32x4(2.0, 1.0, 4.0, math.inf_f32);
+        const v = max(v0, v1);
+        try expect(v[0] == 2.0);
+        try expect(v[1] == 1.0);
+        try expect(v[2] == 5.0);
+        try expect(v[3] == math.inf_f32);
+        try expect(!math.isNan(v[3]));
+    }
+    {
+        const v0 = f32x4(-math.inf_f32, math.inf_f32, math.inf_f32, math.qnan_f32);
+        const v1 = f32x4(math.qnan_f32, -math.inf_f32, math.qnan_f32, math.nan_f32);
+        const v = max(v0, v1);
+        try expect(v[0] == -math.inf_f32);
+        try expect(v[1] == math.inf_f32);
+        try expect(v[2] == math.inf_f32);
+        try expect(!math.isNan(v[2]));
+        try expect(math.isNan(v[3]));
+        try expect(!math.isInf(v[3]));
+    }
+}
+
+pub fn round(v: anytype) @TypeOf(v) {
+    const T = @TypeOf(v);
+    if (cpu_arch == .x86_64 and has_avx) {
+        if (T == F32x4) {
+            return asm ("vroundps $0, %%xmm0, %%xmm0"
+                : [ret] "={xmm0}" (-> T),
+                : [v] "{xmm0}" (v),
+            );
+        } else if (T == F32x8) {
+            return asm ("vroundps $0, %%ymm0, %%ymm0"
+                : [ret] "={ymm0}" (-> T),
+                : [v] "{ymm0}" (v),
+            );
+        } else if (T == F32x16 and has_avx512f) {
+            return asm ("vrndscaleps $0, %%zmm0, %%zmm0"
+                : [ret] "={zmm0}" (-> T),
+                : [v] "{zmm0}" (v),
+            );
+        } else if (T == F32x16 and !has_avx512f) {
+            const arr: [16]f32 = v;
+            var ymm0 = @as(F32x8, arr[0..8].*);
+            var ymm1 = @as(F32x8, arr[8..16].*);
+            ymm0 = asm ("vroundps $0, %%ymm0, %%ymm0"
+                : [ret] "={ymm0}" (-> F32x8),
+                : [v] "{ymm0}" (ymm0),
+            );
+            ymm1 = asm ("vroundps $0, %%ymm1, %%ymm1"
+                : [ret] "={ymm1}" (-> F32x8),
+                : [v] "{ymm1}" (ymm1),
+            );
+            return @shuffle(f32, ymm0, ymm1, [16]i32{ 0, 1, 2, 3, 4, 5, 6, 7, -1, -2, -3, -4, -5, -6, -7, -8 });
+        }
+    } else {
+        const sign = andInt(v, splatNegativeZero(T));
+        const magic = orInt(splatNoFraction(T), sign);
+        var r1 = v + magic;
+        r1 = r1 - magic;
+        const r2 = abs(v);
+        const mask = r2 <= splatNoFraction(T);
+        return select(mask, r1, v);
+    }
+}
+test "zmath.round" {
+    {
+        try expect(all(round(splat(F32x4, math.inf_f32)) == splat(F32x4, math.inf_f32), 0));
+        try expect(all(round(splat(F32x4, -math.inf_f32)) == splat(F32x4, -math.inf_f32), 0));
+        try expect(all(isNan(round(splat(F32x4, math.nan_f32))), 0));
+        try expect(all(isNan(round(splat(F32x4, -math.nan_f32))), 0));
+        try expect(all(isNan(round(splat(F32x4, math.qnan_f32))), 0));
+        try expect(all(isNan(round(splat(F32x4, -math.qnan_f32))), 0));
+    }
+    {
+        var v = round(f32x16(1.1, -1.1, -1.5, 1.5, 2.1, 2.8, 2.9, 4.1, 5.8, 6.1, 7.9, 8.9, 10.1, 11.2, 12.7, 13.1));
+        try expect(approxEqAbs(
+            v,
+            f32x16(1.0, -1.0, -2.0, 2.0, 2.0, 3.0, 3.0, 4.0, 6.0, 6.0, 8.0, 9.0, 10.0, 11.0, 13.0, 13.0),
+            0.0,
+        ));
+    }
+    var v = round(f32x4(1.1, -1.1, -1.5, 1.5));
+    try expect(approxEqAbs(v, f32x4(1.0, -1.0, -2.0, 2.0), 0.0));
+
+    const v1 = f32x4(-10_000_000.1, -math.inf_f32, 10_000_001.5, math.inf_f32);
+    v = round(v1);
+    try expect(v[3] == math.inf_f32);
+    try expect(approxEqAbs(v, f32x4(-10_000_000.1, -math.inf_f32, 10_000_001.5, math.inf_f32), 0.0));
+
+    const v2 = f32x4(-math.qnan_f32, math.qnan_f32, math.nan_f32, -math.inf_f32);
+    v = round(v2);
+    try expect(math.isNan(v2[0]));
+    try expect(math.isNan(v2[1]));
+    try expect(math.isNan(v2[2]));
+    try expect(v2[3] == -math.inf_f32);
+
+    const v3 = f32x4(1001.5, -201.499, -10000.99, -101.5);
+    v = round(v3);
+    try expect(approxEqAbs(v, f32x4(1002.0, -201.0, -10001.0, -102.0), 0.0));
+
+    const v4 = f32x4(-1_388_609.9, 1_388_609.5, 1_388_109.01, 2_388_609.5);
+    v = round(v4);
+    try expect(approxEqAbs(v, f32x4(-1_388_610.0, 1_388_610.0, 1_388_109.0, 2_388_610.0), 0.0));
+
+    var f: f32 = -100.0;
+    var i: u32 = 0;
+    while (i < 100) : (i += 1) {
+        const vr = round(splat(F32x4, f));
+        const fr = @round(splat(F32x4, f));
+        const vr8 = round(splat(F32x8, f));
+        const fr8 = @round(splat(F32x8, f));
+        const vr16 = round(splat(F32x16, f));
+        const fr16 = @round(splat(F32x16, f));
+        try expect(approxEqAbs(vr, fr, 0.0));
+        try expect(approxEqAbs(vr8, fr8, 0.0));
+        try expect(approxEqAbs(vr16, fr16, 0.0));
+        f += 0.12345 * @intToFloat(f32, i);
+    }
+}
+
+pub fn trunc(v: anytype) @TypeOf(v) {
+    const T = @TypeOf(v);
+    if (cpu_arch == .x86_64 and has_avx) {
+        if (T == F32x4) {
+            return asm ("vroundps $3, %%xmm0, %%xmm0"
+                : [ret] "={xmm0}" (-> T),
+                : [v] "{xmm0}" (v),
+            );
+        } else if (T == F32x8) {
+            return asm ("vroundps $3, %%ymm0, %%ymm0"
+                : [ret] "={ymm0}" (-> T),
+                : [v] "{ymm0}" (v),
+            );
+        } else if (T == F32x16 and has_avx512f) {
+            return asm ("vrndscaleps $3, %%zmm0, %%zmm0"
+                : [ret] "={zmm0}" (-> T),
+                : [v] "{zmm0}" (v),
+            );
+        } else if (T == F32x16 and !has_avx512f) {
+            const arr: [16]f32 = v;
+            var ymm0 = @as(F32x8, arr[0..8].*);
+            var ymm1 = @as(F32x8, arr[8..16].*);
+            ymm0 = asm ("vroundps $3, %%ymm0, %%ymm0"
+                : [ret] "={ymm0}" (-> F32x8),
+                : [v] "{ymm0}" (ymm0),
+            );
+            ymm1 = asm ("vroundps $3, %%ymm1, %%ymm1"
+                : [ret] "={ymm1}" (-> F32x8),
+                : [v] "{ymm1}" (ymm1),
+            );
+            return @shuffle(f32, ymm0, ymm1, [16]i32{ 0, 1, 2, 3, 4, 5, 6, 7, -1, -2, -3, -4, -5, -6, -7, -8 });
+        }
+    } else {
+        const mask = abs(v) < splatNoFraction(T);
+        const result = floatToIntAndBack(v);
+        return select(mask, result, v);
+    }
+}
+test "zmath.trunc" {
+    {
+        try expect(all(trunc(splat(F32x4, math.inf_f32)) == splat(F32x4, math.inf_f32), 0));
+        try expect(all(trunc(splat(F32x4, -math.inf_f32)) == splat(F32x4, -math.inf_f32), 0));
+        try expect(all(isNan(trunc(splat(F32x4, math.nan_f32))), 0));
+        try expect(all(isNan(trunc(splat(F32x4, -math.nan_f32))), 0));
+        try expect(all(isNan(trunc(splat(F32x4, math.qnan_f32))), 0));
+        try expect(all(isNan(trunc(splat(F32x4, -math.qnan_f32))), 0));
+    }
+    {
+        var v = trunc(f32x16(1.1, -1.1, -1.5, 1.5, 2.1, 2.8, 2.9, 4.1, 5.8, 6.1, 7.9, 8.9, 10.1, 11.2, 12.7, 13.1));
+        try expect(approxEqAbs(
+            v,
+            f32x16(1.0, -1.0, -1.0, 1.0, 2.0, 2.0, 2.0, 4.0, 5.0, 6.0, 7.0, 8.0, 10.0, 11.0, 12.0, 13.0),
+            0.0,
+        ));
+    }
+    var v = trunc(f32x4(1.1, -1.1, -1.5, 1.5));
+    try expect(approxEqAbs(v, f32x4(1.0, -1.0, -1.0, 1.0), 0.0));
+
+    v = trunc(f32x4(-10_000_002.1, -math.inf_f32, 10_000_001.5, math.inf_f32));
+    try expect(approxEqAbs(v, f32x4(-10_000_002.1, -math.inf_f32, 10_000_001.5, math.inf_f32), 0.0));
+
+    v = trunc(f32x4(-math.qnan_f32, math.qnan_f32, math.nan_f32, -math.inf_f32));
+    try expect(math.isNan(v[0]));
+    try expect(math.isNan(v[1]));
+    try expect(math.isNan(v[2]));
+    try expect(v[3] == -math.inf_f32);
+
+    v = trunc(f32x4(1000.5001, -201.499, -10000.99, 100.750001));
+    try expect(approxEqAbs(v, f32x4(1000.0, -201.0, -10000.0, 100.0), 0.0));
+
+    v = trunc(f32x4(-7_388_609.5, 7_388_609.1, 8_388_109.5, -8_388_509.5));
+    try expect(approxEqAbs(v, f32x4(-7_388_609.0, 7_388_609.0, 8_388_109.0, -8_388_509.0), 0.0));
+
+    var f: f32 = -100.0;
+    var i: u32 = 0;
+    while (i < 100) : (i += 1) {
+        const vr = trunc(splat(F32x4, f));
+        const fr = @trunc(splat(F32x4, f));
+        const vr8 = trunc(splat(F32x8, f));
+        const fr8 = @trunc(splat(F32x8, f));
+        const vr16 = trunc(splat(F32x16, f));
+        const fr16 = @trunc(splat(F32x16, f));
+        try expect(approxEqAbs(vr, fr, 0.0));
+        try expect(approxEqAbs(vr8, fr8, 0.0));
+        try expect(approxEqAbs(vr16, fr16, 0.0));
+        f += 0.12345 * @intToFloat(f32, i);
+    }
+}
+
+pub fn floor(v: anytype) @TypeOf(v) {
+    const T = @TypeOf(v);
+    if (cpu_arch == .x86_64 and has_avx) {
+        if (T == F32x4) {
+            return asm ("vroundps $1, %%xmm0, %%xmm0"
+                : [ret] "={xmm0}" (-> T),
+                : [v] "{xmm0}" (v),
+            );
+        } else if (T == F32x8) {
+            return asm ("vroundps $1, %%ymm0, %%ymm0"
+                : [ret] "={ymm0}" (-> T),
+                : [v] "{ymm0}" (v),
+            );
+        } else if (T == F32x16 and has_avx512f) {
+            return asm ("vrndscaleps $1, %%zmm0, %%zmm0"
+                : [ret] "={zmm0}" (-> T),
+                : [v] "{zmm0}" (v),
+            );
+        } else if (T == F32x16 and !has_avx512f) {
+            const arr: [16]f32 = v;
+            var ymm0 = @as(F32x8, arr[0..8].*);
+            var ymm1 = @as(F32x8, arr[8..16].*);
+            ymm0 = asm ("vroundps $1, %%ymm0, %%ymm0"
+                : [ret] "={ymm0}" (-> F32x8),
+                : [v] "{ymm0}" (ymm0),
+            );
+            ymm1 = asm ("vroundps $1, %%ymm1, %%ymm1"
+                : [ret] "={ymm1}" (-> F32x8),
+                : [v] "{ymm1}" (ymm1),
+            );
+            return @shuffle(f32, ymm0, ymm1, [16]i32{ 0, 1, 2, 3, 4, 5, 6, 7, -1, -2, -3, -4, -5, -6, -7, -8 });
+        }
+    } else {
+        const mask = abs(v) < splatNoFraction(T);
+        var result = floatToIntAndBack(v);
+        const larger_mask = result > v;
+        const larger = select(larger_mask, splat(T, -1.0), splat(T, 0.0));
+        result = result + larger;
+        return select(mask, result, v);
+    }
+}
+test "zmath.floor" {
+    {
+        try expect(all(floor(splat(F32x4, math.inf_f32)) == splat(F32x4, math.inf_f32), 0));
+        try expect(all(floor(splat(F32x4, -math.inf_f32)) == splat(F32x4, -math.inf_f32), 0));
+        try expect(all(isNan(floor(splat(F32x4, math.nan_f32))), 0));
+        try expect(all(isNan(floor(splat(F32x4, -math.nan_f32))), 0));
+        try expect(all(isNan(floor(splat(F32x4, math.qnan_f32))), 0));
+        try expect(all(isNan(floor(splat(F32x4, -math.qnan_f32))), 0));
+    }
+    {
+        var v = floor(f32x16(1.1, -1.1, -1.5, 1.5, 2.1, 2.8, 2.9, 4.1, 5.8, 6.1, 7.9, 8.9, 10.1, 11.2, 12.7, 13.1));
+        try expect(approxEqAbs(
+            v,
+            f32x16(1.0, -2.0, -2.0, 1.0, 2.0, 2.0, 2.0, 4.0, 5.0, 6.0, 7.0, 8.0, 10.0, 11.0, 12.0, 13.0),
+            0.0,
+        ));
+    }
+    var v = floor(f32x4(1.5, -1.5, -1.7, -2.1));
+    try expect(approxEqAbs(v, f32x4(1.0, -2.0, -2.0, -3.0), 0.0));
+
+    v = floor(f32x4(-10_000_002.1, -math.inf_f32, 10_000_001.5, math.inf_f32));
+    try expect(approxEqAbs(v, f32x4(-10_000_002.1, -math.inf_f32, 10_000_001.5, math.inf_f32), 0.0));
+
+    v = floor(f32x4(-math.qnan_f32, math.qnan_f32, math.nan_f32, -math.inf_f32));
+    try expect(math.isNan(v[0]));
+    try expect(math.isNan(v[1]));
+    try expect(math.isNan(v[2]));
+    try expect(v[3] == -math.inf_f32);
+
+    v = floor(f32x4(1000.5001, -201.499, -10000.99, 100.75001));
+    try expect(approxEqAbs(v, f32x4(1000.0, -202.0, -10001.0, 100.0), 0.0));
+
+    v = floor(f32x4(-7_388_609.5, 7_388_609.1, 8_388_109.5, -8_388_509.5));
+    try expect(approxEqAbs(v, f32x4(-7_388_610.0, 7_388_609.0, 8_388_109.0, -8_388_510.0), 0.0));
+
+    var f: f32 = -100.0;
+    var i: u32 = 0;
+    while (i < 100) : (i += 1) {
+        const vr = floor(splat(F32x4, f));
+        const fr = @floor(splat(F32x4, f));
+        const vr8 = floor(splat(F32x8, f));
+        const fr8 = @floor(splat(F32x8, f));
+        const vr16 = floor(splat(F32x16, f));
+        const fr16 = @floor(splat(F32x16, f));
+        try expect(approxEqAbs(vr, fr, 0.0));
+        try expect(approxEqAbs(vr8, fr8, 0.0));
+        try expect(approxEqAbs(vr16, fr16, 0.0));
+        f += 0.12345 * @intToFloat(f32, i);
+    }
+}
+
+pub fn ceil(v: anytype) @TypeOf(v) {
+    const T = @TypeOf(v);
+    if (cpu_arch == .x86_64 and has_avx) {
+        if (T == F32x4) {
+            return asm ("vroundps $2, %%xmm0, %%xmm0"
+                : [ret] "={xmm0}" (-> T),
+                : [v] "{xmm0}" (v),
+            );
+        } else if (T == F32x8) {
+            return asm ("vroundps $2, %%ymm0, %%ymm0"
+                : [ret] "={ymm0}" (-> T),
+                : [v] "{ymm0}" (v),
+            );
+        } else if (T == F32x16 and has_avx512f) {
+            return asm ("vrndscaleps $2, %%zmm0, %%zmm0"
+                : [ret] "={zmm0}" (-> T),
+                : [v] "{zmm0}" (v),
+            );
+        } else if (T == F32x16 and !has_avx512f) {
+            const arr: [16]f32 = v;
+            var ymm0 = @as(F32x8, arr[0..8].*);
+            var ymm1 = @as(F32x8, arr[8..16].*);
+            ymm0 = asm ("vroundps $2, %%ymm0, %%ymm0"
+                : [ret] "={ymm0}" (-> F32x8),
+                : [v] "{ymm0}" (ymm0),
+            );
+            ymm1 = asm ("vroundps $2, %%ymm1, %%ymm1"
+                : [ret] "={ymm1}" (-> F32x8),
+                : [v] "{ymm1}" (ymm1),
+            );
+            return @shuffle(f32, ymm0, ymm1, [16]i32{ 0, 1, 2, 3, 4, 5, 6, 7, -1, -2, -3, -4, -5, -6, -7, -8 });
+        }
+    } else {
+        const mask = abs(v) < splatNoFraction(T);
+        var result = floatToIntAndBack(v);
+        const smaller_mask = result < v;
+        const smaller = select(smaller_mask, splat(T, -1.0), splat(T, 0.0));
+        result = result - smaller;
+        return select(mask, result, v);
+    }
+}
+test "zmath.ceil" {
+    {
+        try expect(all(ceil(splat(F32x4, math.inf_f32)) == splat(F32x4, math.inf_f32), 0));
+        try expect(all(ceil(splat(F32x4, -math.inf_f32)) == splat(F32x4, -math.inf_f32), 0));
+        try expect(all(isNan(ceil(splat(F32x4, math.nan_f32))), 0));
+        try expect(all(isNan(ceil(splat(F32x4, -math.nan_f32))), 0));
+        try expect(all(isNan(ceil(splat(F32x4, math.qnan_f32))), 0));
+        try expect(all(isNan(ceil(splat(F32x4, -math.qnan_f32))), 0));
+    }
+    {
+        var v = ceil(f32x16(1.1, -1.1, -1.5, 1.5, 2.1, 2.8, 2.9, 4.1, 5.8, 6.1, 7.9, 8.9, 10.1, 11.2, 12.7, 13.1));
+        try expect(approxEqAbs(
+            v,
+            f32x16(2.0, -1.0, -1.0, 2.0, 3.0, 3.0, 3.0, 5.0, 6.0, 7.0, 8.0, 9.0, 11.0, 12.0, 13.0, 14.0),
+            0.0,
+        ));
+    }
+    var v = ceil(f32x4(1.5, -1.5, -1.7, -2.1));
+    try expect(approxEqAbs(v, f32x4(2.0, -1.0, -1.0, -2.0), 0.0));
+
+    v = ceil(f32x4(-10_000_002.1, -math.inf_f32, 10_000_001.5, math.inf_f32));
+    try expect(approxEqAbs(v, f32x4(-10_000_002.1, -math.inf_f32, 10_000_001.5, math.inf_f32), 0.0));
+
+    v = ceil(f32x4(-math.qnan_f32, math.qnan_f32, math.nan_f32, -math.inf_f32));
+    try expect(math.isNan(v[0]));
+    try expect(math.isNan(v[1]));
+    try expect(math.isNan(v[2]));
+    try expect(v[3] == -math.inf_f32);
+
+    v = ceil(f32x4(1000.5001, -201.499, -10000.99, 100.75001));
+    try expect(approxEqAbs(v, f32x4(1001.0, -201.0, -10000.0, 101.0), 0.0));
+
+    v = ceil(f32x4(-1_388_609.5, 1_388_609.1, 1_388_109.9, -1_388_509.9));
+    try expect(approxEqAbs(v, f32x4(-1_388_609.0, 1_388_610.0, 1_388_110.0, -1_388_509.0), 0.0));
+
+    var f: f32 = -100.0;
+    var i: u32 = 0;
+    while (i < 100) : (i += 1) {
+        const vr = ceil(splat(F32x4, f));
+        const fr = @ceil(splat(F32x4, f));
+        const vr8 = ceil(splat(F32x8, f));
+        const fr8 = @ceil(splat(F32x8, f));
+        const vr16 = ceil(splat(F32x16, f));
+        const fr16 = @ceil(splat(F32x16, f));
+        try expect(approxEqAbs(vr, fr, 0.0));
+        try expect(approxEqAbs(vr8, fr8, 0.0));
+        try expect(approxEqAbs(vr16, fr16, 0.0));
+        f += 0.12345 * @intToFloat(f32, i);
+    }
+}
+
+pub inline fn clamp(v: anytype, vmin: anytype, vmax: anytype) @TypeOf(v, vmin, vmax) {
+    var result = max(vmin, v);
+    result = min(vmax, result);
+    return result;
+}
+test "zmath.clamp" {
+    if (builtin.target.os.tag == .macos) return error.SkipZigTest;
+    {
+        const v0 = f32x4(-1.0, 0.2, 1.1, -0.3);
+        const v = clamp(v0, splat(F32x4, -0.5), splat(F32x4, 0.5));
+        try expect(approxEqAbs(v, f32x4(-0.5, 0.2, 0.5, -0.3), 0.0001));
+    }
+    {
+        const v0 = f32x8(-2.0, 0.25, -0.25, 100.0, -1.0, 0.2, 1.1, -0.3);
+        const v = clamp(v0, splat(F32x8, -0.5), splat(F32x8, 0.5));
+        try expect(approxEqAbs(v, f32x8(-0.5, 0.25, -0.25, 0.5, -0.5, 0.2, 0.5, -0.3), 0.0001));
+    }
+    {
+        const v0 = f32x4(-math.inf_f32, math.inf_f32, math.nan_f32, math.qnan_f32);
+        const v = clamp(v0, f32x4(-100.0, 0.0, -100.0, 0.0), f32x4(0.0, 100.0, 0.0, 100.0));
+        try expect(approxEqAbs(v, f32x4(-100.0, 100.0, -100.0, 0.0), 0.0001));
+    }
+    {
+        const v0 = f32x4(math.inf_f32, math.inf_f32, -math.nan_f32, -math.qnan_f32);
+        const v = clamp(v0, splat(F32x4, -1.0), splat(F32x4, 1.0));
+        try expect(approxEqAbs(v, f32x4(1.0, 1.0, -1.0, -1.0), 0.0001));
+    }
+}
+
+pub inline fn clampFast(v: anytype, vmin: anytype, vmax: anytype) @TypeOf(v, vmin, vmax) {
+    var result = maxFast(vmin, v);
+    result = minFast(vmax, result);
+    return result;
+}
+test "zmath.clampFast" {
+    {
+        const v0 = f32x4(-1.0, 0.2, 1.1, -0.3);
+        const v = clampFast(v0, splat(F32x4, -0.5), splat(F32x4, 0.5));
+        try expect(approxEqAbs(v, f32x4(-0.5, 0.2, 0.5, -0.3), 0.0001));
+    }
+}
+
+pub inline fn saturate(v: anytype) @TypeOf(v) {
+    const T = @TypeOf(v);
+    var result = max(v, splat(T, 0.0));
+    result = min(result, splat(T, 1.0));
+    return result;
+}
+test "zmath.saturate" {
+    if (builtin.target.os.tag == .macos) return error.SkipZigTest;
+    {
+        const v0 = f32x4(-1.0, 0.2, 1.1, -0.3);
+        const v = saturate(v0);
+        try expect(approxEqAbs(v, f32x4(0.0, 0.2, 1.0, 0.0), 0.0001));
+    }
+    {
+        const v0 = f32x8(0.0, 0.0, 2.0, -2.0, -1.0, 0.2, 1.1, -0.3);
+        const v = saturate(v0);
+        try expect(approxEqAbs(v, f32x8(0.0, 0.0, 1.0, 0.0, 0.0, 0.2, 1.0, 0.0), 0.0001));
+    }
+    {
+        const v0 = f32x4(-math.inf_f32, math.inf_f32, math.nan_f32, math.qnan_f32);
+        const v = saturate(v0);
+        try expect(approxEqAbs(v, f32x4(0.0, 1.0, 0.0, 0.0), 0.0001));
+    }
+    {
+        const v0 = f32x4(math.inf_f32, math.inf_f32, -math.nan_f32, -math.qnan_f32);
+        const v = saturate(v0);
+        try expect(approxEqAbs(v, f32x4(1.0, 1.0, 0.0, 0.0), 0.0001));
+    }
+}
+
+pub inline fn saturateFast(v: anytype) @TypeOf(v) {
+    const T = @TypeOf(v);
+    var result = maxFast(v, splat(T, 0.0));
+    result = minFast(result, splat(T, 1.0));
+    return result;
+}
+test "zmath.saturateFast" {
+    {
+        const v0 = f32x4(-1.0, 0.2, 1.1, -0.3);
+        const v = saturateFast(v0);
+        try expect(approxEqAbs(v, f32x4(0.0, 0.2, 1.0, 0.0), 0.0001));
+    }
+    {
+        const v0 = f32x8(0.0, 0.0, 2.0, -2.0, -1.0, 0.2, 1.1, -0.3);
+        const v = saturateFast(v0);
+        try expect(approxEqAbs(v, f32x8(0.0, 0.0, 1.0, 0.0, 0.0, 0.2, 1.0, 0.0), 0.0001));
+    }
+    {
+        const v0 = f32x4(-math.inf_f32, math.inf_f32, math.nan_f32, math.qnan_f32);
+        const v = saturateFast(v0);
+        try expect(approxEqAbs(v, f32x4(0.0, 1.0, 0.0, 0.0), 0.0001));
+    }
+    {
+        const v0 = f32x4(math.inf_f32, math.inf_f32, -math.nan_f32, -math.qnan_f32);
+        const v = saturateFast(v0);
+        try expect(approxEqAbs(v, f32x4(1.0, 1.0, 0.0, 0.0), 0.0001));
+    }
+}
+
+pub inline fn sqrt(v: anytype) @TypeOf(v) {
+    return @sqrt(v); // sqrtps
+}
+
+pub inline fn abs(v: anytype) @TypeOf(v) {
+    return @fabs(v); // load, andps
+}
+
+pub inline fn select(mask: anytype, v0: anytype, v1: anytype) @TypeOf(v0, v1) {
+    return @select(f32, mask, v0, v1);
+}
+
+pub inline fn lerp(v0: anytype, v1: anytype, t: f32) @TypeOf(v0, v1) {
+    const T = @TypeOf(v0, v1);
+    return v0 + (v1 - v0) * splat(T, t); // subps, shufps, addps, mulps
+}
+
+pub inline fn lerpV(v0: anytype, v1: anytype, t: anytype) @TypeOf(v0, v1, t) {
+    return v0 + (v1 - v0) * t; // subps, addps, mulps
+}
+
+pub inline fn lerpInverse(v0: anytype, v1: anytype, t: anytype) @TypeOf(v0, v1) {
+    const T = @TypeOf(v0, v1);
+    return (splat(T, t) - v0) / (v1 - v0);
+}
+
+pub inline fn lerpInverseV(v0: anytype, v1: anytype, t: anytype) @TypeOf(v0, v1, t) {
+    return (t - v0) / (v1 - v0);
+}
+test "zmath.lerpInverse" {
+    try expect(math.approxEqAbs(f32, lerpInverseV(10.0, 100.0, 10.0), 0, 0.0005));
+    try expect(math.approxEqAbs(f32, lerpInverseV(10.0, 100.0, 100.0), 1, 0.0005));
+    try expect(math.approxEqAbs(f32, lerpInverseV(10.0, 100.0, 55.0), 0.5, 0.05));
+    try expect(approxEqAbs(lerpInverse(f32x4(0, 0, 10, 10), f32x4(100, 200, 100, 100), 10.0), f32x4(0.1, 0.05, 0, 0), 0.0005));
+}
+
+/// To transform a vector of values from one range to another.
+pub inline fn mapLinear(v: anytype, min1: anytype, max1: anytype, min2: anytype, max2: anytype) @TypeOf(v) {
+    const T = @TypeOf(v);
+    const min1V = splat(T, min1);
+    const max1V = splat(T, max1);
+    const min2V = splat(T, min2);
+    const max2V = splat(T, max2);
+    const dV = max1V - min1V;
+    return min2V + (v - min1V) * (max2V - min2V) / dV;
+}
+
+pub inline fn mapLinearV(v: anytype, min1: anytype, max1: anytype, min2: anytype, max2: anytype) @TypeOf(v, min1, max1, min2, max2) {
+    const d = max1 - min1;
+    return min2 + (v - min1) * (max2 - min2) / d;
+}
+test "zmath.mapLinear" {
+    try expect(math.approxEqAbs(f32, mapLinearV(0, 0, 1.2, 10, 100), 10, 0.0005));
+    try expect(math.approxEqAbs(f32, mapLinearV(1.2, 0, 1.2, 10, 100), 100, 0.0005));
+    try expect(math.approxEqAbs(f32, mapLinearV(0.6, 0, 1.2, 10, 100), 55, 0.0005));
+    try expect(approxEqAbs(mapLinearV(splat(F32x4, 0), splat(F32x4, 0), splat(F32x4, 1.2), splat(F32x4, 10), splat(F32x4, 100)), splat(F32x4, 10), 0.0005));
+    try expect(approxEqAbs(mapLinear(f32x4(0, 0, 0.6, 1.2), 0, 1.2, 10, 100), f32x4(10, 10, 55, 100), 0.0005));
+}
+
+pub const F32x4Component = enum { x, y, z, w };
+
+pub inline fn swizzle(
+    v: F32x4,
+    comptime x: F32x4Component,
+    comptime y: F32x4Component,
+    comptime z: F32x4Component,
+    comptime w: F32x4Component,
+) F32x4 {
+    return @shuffle(f32, v, undefined, [4]i32{ @enumToInt(x), @enumToInt(y), @enumToInt(z), @enumToInt(w) });
+}
+
+pub inline fn mod(v0: anytype, v1: anytype) @TypeOf(v0, v1) {
+    // vdivps, vroundps, vmulps, vsubps
+    return v0 - v1 * trunc(v0 / v1);
+}
+test "zmath.mod" {
+    if (builtin.target.os.tag == .macos and builtin.zig_backend != .stage1) return error.SkipZigTest;
+    try expect(approxEqAbs(mod(splat(F32x4, 3.1), splat(F32x4, 1.7)), splat(F32x4, 1.4), 0.0005));
+    try expect(approxEqAbs(mod(splat(F32x4, -3.0), splat(F32x4, 2.0)), splat(F32x4, -1.0), 0.0005));
+    try expect(approxEqAbs(mod(splat(F32x4, -3.0), splat(F32x4, -2.0)), splat(F32x4, -1.0), 0.0005));
+    try expect(approxEqAbs(mod(splat(F32x4, 3.0), splat(F32x4, -2.0)), splat(F32x4, 1.0), 0.0005));
+    try expect(all(isNan(mod(splat(F32x4, math.inf_f32), splat(F32x4, 1.0))), 0));
+    try expect(all(isNan(mod(splat(F32x4, -math.inf_f32), splat(F32x4, 123.456))), 0));
+    try expect(all(isNan(mod(splat(F32x4, math.nan_f32), splat(F32x4, 123.456))), 0));
+    try expect(all(isNan(mod(splat(F32x4, math.qnan_f32), splat(F32x4, 123.456))), 0));
+    try expect(all(isNan(mod(splat(F32x4, -math.qnan_f32), splat(F32x4, 123.456))), 0));
+    try expect(all(isNan(mod(splat(F32x4, 123.456), splat(F32x4, math.inf_f32))), 0));
+    try expect(all(isNan(mod(splat(F32x4, 123.456), splat(F32x4, -math.inf_f32))), 0));
+    try expect(all(isNan(mod(splat(F32x4, math.inf_f32), splat(F32x4, math.inf_f32))), 0));
+    try expect(all(isNan(mod(splat(F32x4, 123.456), splat(F32x4, math.nan_f32))), 0));
+    try expect(all(isNan(mod(splat(F32x4, math.inf_f32), splat(F32x4, math.nan_f32))), 0));
+}
+
+pub fn modAngle(v: anytype) @TypeOf(v) {
+    const T = @TypeOf(v);
+    return switch (T) {
+        f32 => modAngle32(v),
+        F32x4, F32x8, F32x16 => modAngle32xN(v),
+        else => @compileError("zmath.modAngle() not implemented for " ++ @typeName(T)),
+    };
+}
+
+pub inline fn modAngle32xN(v: anytype) @TypeOf(v) {
+    const T = @TypeOf(v);
+    return v - splat(T, math.tau) * round(v * splat(T, 1.0 / math.tau)); // 2 x vmulps, 2 x load, vroundps, vaddps
+}
+test "zmath.modAngle" {
+    try expect(approxEqAbs(modAngle(splat(F32x4, math.tau)), splat(F32x4, 0.0), 0.0005));
+    try expect(approxEqAbs(modAngle(splat(F32x4, 0.0)), splat(F32x4, 0.0), 0.0005));
+    try expect(approxEqAbs(modAngle(splat(F32x4, math.pi)), splat(F32x4, math.pi), 0.0005));
+    try expect(approxEqAbs(modAngle(splat(F32x4, 11 * math.pi)), splat(F32x4, math.pi), 0.0005));
+    try expect(approxEqAbs(modAngle(splat(F32x4, 3.5 * math.pi)), splat(F32x4, -0.5 * math.pi), 0.0005));
+    try expect(approxEqAbs(modAngle(splat(F32x4, 2.5 * math.pi)), splat(F32x4, 0.5 * math.pi), 0.0005));
+}
+
+pub inline fn mulAdd(v0: anytype, v1: anytype, v2: anytype) @TypeOf(v0, v1, v2) {
+    const T = @TypeOf(v0, v1, v2);
+    if (@import("zmath_options").enable_cross_platform_determinism) {
+        return v0 * v1 + v2; // Compiler will generate mul, add sequence (no fma even if the target supports it).
+    } else {
+        if (cpu_arch == .x86_64 and has_avx and has_fma) {
+            return @mulAdd(T, v0, v1, v2);
+        } else {
+            // NOTE(mziulek): On .x86_64 without HW fma instructions @mulAdd maps to really slow code!
+            return v0 * v1 + v2;
+        }
+    }
+}
+
+fn sin32xN(v: anytype) @TypeOf(v) {
+    // 11-degree minimax approximation
+    const T = @TypeOf(v);
+
+    var x = modAngle(v);
+    const sign = andInt(x, splatNegativeZero(T));
+    const c = orInt(sign, splat(T, math.pi));
+    const absx = andNotInt(sign, x);
+    const rflx = c - x;
+    const comp = absx <= splat(T, 0.5 * math.pi);
+    x = select(comp, x, rflx);
+    const x2 = x * x;
+
+    var result = mulAdd(splat(T, -2.3889859e-08), x2, splat(T, 2.7525562e-06));
+    result = mulAdd(result, x2, splat(T, -0.00019840874));
+    result = mulAdd(result, x2, splat(T, 0.0083333310));
+    result = mulAdd(result, x2, splat(T, -0.16666667));
+    result = mulAdd(result, x2, splat(T, 1.0));
+    return x * result;
+}
+test "zmath.sin" {
+    const epsilon = 0.0001;
+
+    try expect(approxEqAbs(sin(splat(F32x4, 0.5 * math.pi)), splat(F32x4, 1.0), epsilon));
+    try expect(approxEqAbs(sin(splat(F32x4, 0.0)), splat(F32x4, 0.0), epsilon));
+    try expect(approxEqAbs(sin(splat(F32x4, -0.0)), splat(F32x4, -0.0), epsilon));
+    try expect(approxEqAbs(sin(splat(F32x4, 89.123)), splat(F32x4, 0.916166), epsilon));
+    try expect(approxEqAbs(sin(splat(F32x8, 89.123)), splat(F32x8, 0.916166), epsilon));
+    try expect(approxEqAbs(sin(splat(F32x16, 89.123)), splat(F32x16, 0.916166), epsilon));
+    try expect(all(isNan(sin(splat(F32x4, math.inf_f32))), 0) == true);
+    try expect(all(isNan(sin(splat(F32x4, -math.inf_f32))), 0) == true);
+    try expect(all(isNan(sin(splat(F32x4, math.nan_f32))), 0) == true);
+    try expect(all(isNan(sin(splat(F32x4, math.qnan_f32))), 0) == true);
+
+    var f: f32 = -100.0;
+    var i: u32 = 0;
+    while (i < 100) : (i += 1) {
+        const vr = sin(splat(F32x4, f));
+        const fr = @sin(splat(F32x4, f));
+        const vr8 = sin(splat(F32x8, f));
+        const fr8 = @sin(splat(F32x8, f));
+        const vr16 = sin(splat(F32x16, f));
+        const fr16 = @sin(splat(F32x16, f));
+        try expect(approxEqAbs(vr, fr, epsilon));
+        try expect(approxEqAbs(vr8, fr8, epsilon));
+        try expect(approxEqAbs(vr16, fr16, epsilon));
+        f += 0.12345 * @intToFloat(f32, i);
+    }
+}
+
+fn cos32xN(v: anytype) @TypeOf(v) {
+    // 10-degree minimax approximation
+    const T = @TypeOf(v);
+
+    var x = modAngle(v);
+    var sign = andInt(x, splatNegativeZero(T));
+    const c = orInt(sign, splat(T, math.pi));
+    const absx = andNotInt(sign, x);
+    const rflx = c - x;
+    const comp = absx <= splat(T, 0.5 * math.pi);
+    x = select(comp, x, rflx);
+    sign = select(comp, splat(T, 1.0), splat(T, -1.0));
+    const x2 = x * x;
+
+    var result = mulAdd(splat(T, -2.6051615e-07), x2, splat(T, 2.4760495e-05));
+    result = mulAdd(result, x2, splat(T, -0.0013888378));
+    result = mulAdd(result, x2, splat(T, 0.041666638));
+    result = mulAdd(result, x2, splat(T, -0.5));
+    result = mulAdd(result, x2, splat(T, 1.0));
+    return sign * result;
+}
+test "zmath.cos" {
+    const epsilon = 0.0001;
+
+    try expect(approxEqAbs(cos(splat(F32x4, 0.5 * math.pi)), splat(F32x4, 0.0), epsilon));
+    try expect(approxEqAbs(cos(splat(F32x4, 0.0)), splat(F32x4, 1.0), epsilon));
+    try expect(approxEqAbs(cos(splat(F32x4, -0.0)), splat(F32x4, 1.0), epsilon));
+    try expect(all(isNan(cos(splat(F32x4, math.inf_f32))), 0) == true);
+    try expect(all(isNan(cos(splat(F32x4, -math.inf_f32))), 0) == true);
+    try expect(all(isNan(cos(splat(F32x4, math.nan_f32))), 0) == true);
+    try expect(all(isNan(cos(splat(F32x4, math.qnan_f32))), 0) == true);
+
+    var f: f32 = -100.0;
+    var i: u32 = 0;
+    while (i < 100) : (i += 1) {
+        const vr = cos(splat(F32x4, f));
+        const fr = @cos(splat(F32x4, f));
+        const vr8 = cos(splat(F32x8, f));
+        const fr8 = @cos(splat(F32x8, f));
+        const vr16 = cos(splat(F32x16, f));
+        const fr16 = @cos(splat(F32x16, f));
+        try expect(approxEqAbs(vr, fr, epsilon));
+        try expect(approxEqAbs(vr8, fr8, epsilon));
+        try expect(approxEqAbs(vr16, fr16, epsilon));
+        f += 0.12345 * @intToFloat(f32, i);
+    }
+}
+
+pub fn sin(v: anytype) @TypeOf(v) {
+    const T = @TypeOf(v);
+    return switch (T) {
+        f32 => sin32(v),
+        F32x4, F32x8, F32x16 => sin32xN(v),
+        else => @compileError("zmath.sin() not implemented for " ++ @typeName(T)),
+    };
+}
+
+pub fn cos(v: anytype) @TypeOf(v) {
+    const T = @TypeOf(v);
+    return switch (T) {
+        f32 => cos32(v),
+        F32x4, F32x8, F32x16 => cos32xN(v),
+        else => @compileError("zmath.cos() not implemented for " ++ @typeName(T)),
+    };
+}
+
+pub fn sincos(v: anytype) [2]@TypeOf(v) {
+    const T = @TypeOf(v);
+    return switch (T) {
+        f32 => sincos32(v),
+        F32x4, F32x8, F32x16 => sincos32xN(v),
+        else => @compileError("zmath.sincos() not implemented for " ++ @typeName(T)),
+    };
+}
+
+pub fn asin(v: anytype) @TypeOf(v) {
+    const T = @TypeOf(v);
+    return switch (T) {
+        f32 => asin32(v),
+        F32x4, F32x8, F32x16 => asin32xN(v),
+        else => @compileError("zmath.asin() not implemented for " ++ @typeName(T)),
+    };
+}
+
+pub fn acos(v: anytype) @TypeOf(v) {
+    const T = @TypeOf(v);
+    return switch (T) {
+        f32 => acos32(v),
+        F32x4, F32x8, F32x16 => acos32xN(v),
+        else => @compileError("zmath.acos() not implemented for " ++ @typeName(T)),
+    };
+}
+
+fn sincos32xN(v: anytype) [2]@TypeOf(v) {
+    const T = @TypeOf(v);
+
+    var x = modAngle(v);
+    var sign = andInt(x, splatNegativeZero(T));
+    const c = orInt(sign, splat(T, math.pi));
+    const absx = andNotInt(sign, x);
+    const rflx = c - x;
+    const comp = absx <= splat(T, 0.5 * math.pi);
+    x = select(comp, x, rflx);
+    sign = select(comp, splat(T, 1.0), splat(T, -1.0));
+    const x2 = x * x;
+
+    var sresult = mulAdd(splat(T, -2.3889859e-08), x2, splat(T, 2.7525562e-06));
+    sresult = mulAdd(sresult, x2, splat(T, -0.00019840874));
+    sresult = mulAdd(sresult, x2, splat(T, 0.0083333310));
+    sresult = mulAdd(sresult, x2, splat(T, -0.16666667));
+    sresult = x * mulAdd(sresult, x2, splat(T, 1.0));
+
+    var cresult = mulAdd(splat(T, -2.6051615e-07), x2, splat(T, 2.4760495e-05));
+    cresult = mulAdd(cresult, x2, splat(T, -0.0013888378));
+    cresult = mulAdd(cresult, x2, splat(T, 0.041666638));
+    cresult = mulAdd(cresult, x2, splat(T, -0.5));
+    cresult = sign * mulAdd(cresult, x2, splat(T, 1.0));
+
+    return .{ sresult, cresult };
+}
+test "zmath.sincos32xN" {
+    const epsilon = 0.0001;
+
+    var f: f32 = -100.0;
+    var i: u32 = 0;
+    while (i < 100) : (i += 1) {
+        const sc = sincos(splat(F32x4, f));
+        const sc8 = sincos(splat(F32x8, f));
+        const sc16 = sincos(splat(F32x16, f));
+        const s4 = @sin(splat(F32x4, f));
+        const s8 = @sin(splat(F32x8, f));
+        const s16 = @sin(splat(F32x16, f));
+        const c4 = @cos(splat(F32x4, f));
+        const c8 = @cos(splat(F32x8, f));
+        const c16 = @cos(splat(F32x16, f));
+        try expect(approxEqAbs(sc[0], s4, epsilon));
+        try expect(approxEqAbs(sc8[0], s8, epsilon));
+        try expect(approxEqAbs(sc16[0], s16, epsilon));
+        try expect(approxEqAbs(sc[1], c4, epsilon));
+        try expect(approxEqAbs(sc8[1], c8, epsilon));
+        try expect(approxEqAbs(sc16[1], c16, epsilon));
+        f += 0.12345 * @intToFloat(f32, i);
+    }
+}
+
+fn asin32xN(v: anytype) @TypeOf(v) {
+    // 7-degree minimax approximation
+    const T = @TypeOf(v);
+
+    const x = abs(v);
+    const root = sqrt(maxFast(splat(T, 0.0), splat(T, 1.0) - x));
+
+    var t0 = mulAdd(splat(T, -0.0012624911), x, splat(T, 0.0066700901));
+    t0 = mulAdd(t0, x, splat(T, -0.0170881256));
+    t0 = mulAdd(t0, x, splat(T, 0.0308918810));
+    t0 = mulAdd(t0, x, splat(T, -0.0501743046));
+    t0 = mulAdd(t0, x, splat(T, 0.0889789874));
+    t0 = mulAdd(t0, x, splat(T, -0.2145988016));
+    t0 = root * mulAdd(t0, x, splat(T, 1.5707963050));
+
+    const t1 = splat(T, math.pi) - t0;
+    return splat(T, 0.5 * math.pi) - select(v >= splat(T, 0.0), t0, t1);
+}
+
+fn acos32xN(v: anytype) @TypeOf(v) {
+    // 7-degree minimax approximation
+    const T = @TypeOf(v);
+
+    const x = abs(v);
+    const root = sqrt(maxFast(splat(T, 0.0), splat(T, 1.0) - x));
+
+    var t0 = mulAdd(splat(T, -0.0012624911), x, splat(T, 0.0066700901));
+    t0 = mulAdd(t0, x, splat(T, -0.0170881256));
+    t0 = mulAdd(t0, x, splat(T, 0.0308918810));
+    t0 = mulAdd(t0, x, splat(T, -0.0501743046));
+    t0 = mulAdd(t0, x, splat(T, 0.0889789874));
+    t0 = mulAdd(t0, x, splat(T, -0.2145988016));
+    t0 = root * mulAdd(t0, x, splat(T, 1.5707963050));
+
+    const t1 = splat(T, math.pi) - t0;
+    return select(v >= splat(T, 0.0), t0, t1);
+}
+
+pub fn atan(v: anytype) @TypeOf(v) {
+    // 17-degree minimax approximation
+    const T = @TypeOf(v);
+
+    const vabs = abs(v);
+    const vinv = splat(T, 1.0) / v;
+    var sign = select(v > splat(T, 1.0), splat(T, 1.0), splat(T, -1.0));
+    const comp = vabs <= splat(T, 1.0);
+    sign = select(comp, splat(T, 0.0), sign);
+    const x = select(comp, v, vinv);
+    const x2 = x * x;
+
+    var result = mulAdd(splat(T, 0.0028662257), x2, splat(T, -0.0161657367));
+    result = mulAdd(result, x2, splat(T, 0.0429096138));
+    result = mulAdd(result, x2, splat(T, -0.0752896400));
+    result = mulAdd(result, x2, splat(T, 0.1065626393));
+    result = mulAdd(result, x2, splat(T, -0.1420889944));
+    result = mulAdd(result, x2, splat(T, 0.1999355085));
+    result = mulAdd(result, x2, splat(T, -0.3333314528));
+    result = x * mulAdd(result, x2, splat(T, 1.0));
+
+    const result1 = sign * splat(T, 0.5 * math.pi) - result;
+    return select(sign == splat(T, 0.0), result, result1);
+}
+test "zmath.atan" {
+    const epsilon = 0.0001;
+    {
+        const v = f32x4(0.25, 0.5, 1.0, 1.25);
+        const e = f32x4(math.atan(v[0]), math.atan(v[1]), math.atan(v[2]), math.atan(v[3]));
+        try expect(approxEqAbs(e, atan(v), epsilon));
+    }
+    {
+        const v = f32x8(-0.25, 0.5, -1.0, 1.25, 100.0, -200.0, 300.0, 400.0);
+        // zig fmt: off
+        const e = f32x8(
+            math.atan(v[0]), math.atan(v[1]), math.atan(v[2]), math.atan(v[3]),
+            math.atan(v[4]), math.atan(v[5]), math.atan(v[6]), math.atan(v[7]),
+        );
+        // zig fmt: on
+        try expect(approxEqAbs(e, atan(v), epsilon));
+    }
+    {
+        // zig fmt: off
+        const v = f32x16(
+            -0.25, 0.5, -1.0, 0.0, 0.1, -0.2, 30.0, 400.0,
+            -0.25, 0.5, -1.0, -0.0, -0.05, -0.125, 0.0625, 4000.0
+        );
+        const e = f32x16(
+            math.atan(v[0]), math.atan(v[1]), math.atan(v[2]), math.atan(v[3]),
+            math.atan(v[4]), math.atan(v[5]), math.atan(v[6]), math.atan(v[7]),
+            math.atan(v[8]), math.atan(v[9]), math.atan(v[10]), math.atan(v[11]),
+            math.atan(v[12]), math.atan(v[13]), math.atan(v[14]), math.atan(v[15]),
+        );
+        // zig fmt: on
+        try expect(approxEqAbs(e, atan(v), epsilon));
+    }
+    {
+        try expect(approxEqAbs(atan(splat(F32x4, math.inf_f32)), splat(F32x4, 0.5 * math.pi), epsilon));
+        try expect(approxEqAbs(atan(splat(F32x4, -math.inf_f32)), splat(F32x4, -0.5 * math.pi), epsilon));
+        try expect(all(isNan(atan(splat(F32x4, math.nan_f32))), 0) == true);
+        try expect(all(isNan(atan(splat(F32x4, -math.nan_f32))), 0) == true);
+    }
+}
+
+pub fn atan2(vy: anytype, vx: anytype) @TypeOf(vx, vy) {
+    const T = @TypeOf(vx, vy);
+    const Tu = @Vector(veclen(T), u32);
+
+    const vx_is_positive =
+        (@bitCast(Tu, vx) & @splat(veclen(T), @as(u32, 0x8000_0000))) == @splat(veclen(T), @as(u32, 0));
+
+    const vy_sign = andInt(vy, splatNegativeZero(T));
+    const c0_25pi = orInt(vy_sign, splat(T, 0.25 * math.pi));
+    const c0_50pi = orInt(vy_sign, splat(T, 0.50 * math.pi));
+    const c0_75pi = orInt(vy_sign, splat(T, 0.75 * math.pi));
+    const c1_00pi = orInt(vy_sign, splat(T, 1.00 * math.pi));
+
+    var r1 = select(vx_is_positive, vy_sign, c1_00pi);
+    var r2 = select(vx == splat(T, 0.0), c0_50pi, splatInt(T, 0xffff_ffff));
+    const r3 = select(vy == splat(T, 0.0), r1, r2);
+    const r4 = select(vx_is_positive, c0_25pi, c0_75pi);
+    const r5 = select(isInf(vx), r4, c0_50pi);
+    const result = select(isInf(vy), r5, r3);
+    const result_valid = @bitCast(Tu, result) == @splat(veclen(T), @as(u32, 0xffff_ffff));
+
+    const v = vy / vx;
+    const r0 = atan(v);
+
+    r1 = select(vx_is_positive, splatNegativeZero(T), c1_00pi);
+    r2 = r0 + r1;
+
+    return select(result_valid, r2, result);
+}
+test "zmath.atan2" {
+    // From DirectXMath XMVectorATan2():
+    //
+    // Return the inverse tangent of Y / X in the range of -Pi to Pi with the following exceptions:
+
+    //     Y == 0 and X is Negative         -> Pi with the sign of Y
+    //     y == 0 and x is positive         -> 0 with the sign of y
+    //     Y != 0 and X == 0                -> Pi / 2 with the sign of Y
+    //     Y != 0 and X is Negative         -> atan(y/x) + (PI with the sign of Y)
+    //     X == -Infinity and Finite Y      -> Pi with the sign of Y
+    //     X == +Infinity and Finite Y      -> 0 with the sign of Y
+    //     Y == Infinity and X is Finite    -> Pi / 2 with the sign of Y
+    //     Y == Infinity and X == -Infinity -> 3Pi / 4 with the sign of Y
+    //     Y == Infinity and X == +Infinity -> Pi / 4 with the sign of Y
+
+    const epsilon = 0.0001;
+    try expect(approxEqAbs(atan2(splat(F32x4, 0.0), splat(F32x4, -1.0)), splat(F32x4, math.pi), epsilon));
+    try expect(approxEqAbs(atan2(splat(F32x4, -0.0), splat(F32x4, -1.0)), splat(F32x4, -math.pi), epsilon));
+    try expect(approxEqAbs(atan2(splat(F32x4, 1.0), splat(F32x4, 0.0)), splat(F32x4, 0.5 * math.pi), epsilon));
+    try expect(approxEqAbs(atan2(splat(F32x4, -1.0), splat(F32x4, 0.0)), splat(F32x4, -0.5 * math.pi), epsilon));
+    try expect(approxEqAbs(
+        atan2(splat(F32x4, 1.0), splat(F32x4, -1.0)),
+        splat(F32x4, math.atan(@as(f32, -1.0)) + math.pi),
+        epsilon,
+    ));
+    try expect(approxEqAbs(
+        atan2(splat(F32x4, -10.0), splat(F32x4, -2.0)),
+        splat(F32x4, math.atan(@as(f32, 5.0)) - math.pi),
+        epsilon,
+    ));
+    try expect(approxEqAbs(atan2(splat(F32x4, 1.0), splat(F32x4, -math.inf_f32)), splat(F32x4, math.pi), epsilon));
+    try expect(approxEqAbs(atan2(splat(F32x4, -1.0), splat(F32x4, -math.inf_f32)), splat(F32x4, -math.pi), epsilon));
+    try expect(approxEqAbs(atan2(splat(F32x4, 1.0), splat(F32x4, math.inf_f32)), splat(F32x4, 0.0), epsilon));
+    try expect(approxEqAbs(atan2(splat(F32x4, -1.0), splat(F32x4, math.inf_f32)), splat(F32x4, -0.0), epsilon));
+    try expect(approxEqAbs(
+        atan2(splat(F32x4, math.inf_f32), splat(F32x4, 2.0)),
+        splat(F32x4, 0.5 * math.pi),
+        epsilon,
+    ));
+    try expect(approxEqAbs(
+        atan2(splat(F32x4, -math.inf_f32), splat(F32x4, 2.0)),
+        splat(F32x4, -0.5 * math.pi),
+        epsilon,
+    ));
+    try expect(approxEqAbs(
+        atan2(splat(F32x4, math.inf_f32), splat(F32x4, -math.inf_f32)),
+        splat(F32x4, 0.75 * math.pi),
+        epsilon,
+    ));
+    try expect(approxEqAbs(
+        atan2(splat(F32x4, -math.inf_f32), splat(F32x4, -math.inf_f32)),
+        splat(F32x4, -0.75 * math.pi),
+        epsilon,
+    ));
+    try expect(approxEqAbs(
+        atan2(splat(F32x4, math.inf_f32), splat(F32x4, math.inf_f32)),
+        splat(F32x4, 0.25 * math.pi),
+        epsilon,
+    ));
+    try expect(approxEqAbs(
+        atan2(splat(F32x4, -math.inf_f32), splat(F32x4, math.inf_f32)),
+        splat(F32x4, -0.25 * math.pi),
+        epsilon,
+    ));
+    try expect(approxEqAbs(
+        atan2(
+            f32x8(0.0, -math.inf_f32, -0.0, 2.0, math.inf_f32, math.inf_f32, 1.0, -math.inf_f32),
+            f32x8(-2.0, math.inf_f32, 1.0, 0.0, 10.0, -math.inf_f32, 1.0, -math.inf_f32),
+        ),
+        f32x8(
+            math.pi,
+            -0.25 * math.pi,
+            -0.0,
+            0.5 * math.pi,
+            0.5 * math.pi,
+            0.75 * math.pi,
+            math.atan(@as(f32, 1.0)),
+            -0.75 * math.pi,
+        ),
+        epsilon,
+    ));
+    try expect(approxEqAbs(atan2(splat(F32x4, 0.0), splat(F32x4, 0.0)), splat(F32x4, 0.0), epsilon));
+    try expect(approxEqAbs(atan2(splat(F32x4, -0.0), splat(F32x4, 0.0)), splat(F32x4, 0.0), epsilon));
+    try expect(all(isNan(atan2(splat(F32x4, 1.0), splat(F32x4, math.nan_f32))), 0) == true);
+    try expect(all(isNan(atan2(splat(F32x4, -1.0), splat(F32x4, math.nan_f32))), 0) == true);
+    try expect(all(isNan(atan2(splat(F32x4, math.nan_f32), splat(F32x4, -1.0))), 0) == true);
+    try expect(all(isNan(atan2(splat(F32x4, -math.nan_f32), splat(F32x4, 1.0))), 0) == true);
+}
+// ------------------------------------------------------------------------------
+//
+// 3. 2D, 3D, 4D vector functions
+//
+// ------------------------------------------------------------------------------
+pub inline fn dot2(v0: Vec, v1: Vec) F32x4 {
+    var xmm0 = v0 * v1; // | x0*x1 | y0*y1 | -- | -- |
+    var xmm1 = swizzle(xmm0, .y, .x, .x, .x); // | y0*y1 | -- | -- | -- |
+    xmm0 = f32x4(xmm0[0] + xmm1[0], xmm0[1], xmm0[2], xmm0[3]); // | x0*x1 + y0*y1 | -- | -- | -- |
+    return swizzle(xmm0, .x, .x, .x, .x);
+}
+test "zmath.dot2" {
+    const v0 = f32x4(-1.0, 2.0, 300.0, -2.0);
+    const v1 = f32x4(4.0, 5.0, 600.0, 2.0);
+    var v = dot2(v0, v1);
+    try expect(approxEqAbs(v, splat(F32x4, 6.0), 0.0001));
+}
+
+pub inline fn dot3(v0: Vec, v1: Vec) F32x4 {
+    const dot = v0 * v1;
+    return f32x4s(dot[0] + dot[1] + dot[2]);
+}
+test "zmath.dot3" {
+    const v0 = f32x4(-1.0, 2.0, 3.0, 1.0);
+    const v1 = f32x4(4.0, 5.0, 6.0, 1.0);
+    var v = dot3(v0, v1);
+    try expect(approxEqAbs(v, splat(F32x4, 24.0), 0.0001));
+}
+
+pub inline fn dot4(v0: Vec, v1: Vec) F32x4 {
+    var xmm0 = v0 * v1; // | x0*x1 | y0*y1 | z0*z1 | w0*w1 |
+    var xmm1 = swizzle(xmm0, .y, .x, .w, .x); // | y0*y1 | -- | w0*w1 | -- |
+    xmm1 = xmm0 + xmm1; // | x0*x1 + y0*y1 | -- | z0*z1 + w0*w1 | -- |
+    xmm0 = swizzle(xmm1, .z, .x, .x, .x); // | z0*z1 + w0*w1 | -- | -- | -- |
+    xmm0 = f32x4(xmm0[0] + xmm1[0], xmm0[1], xmm0[2], xmm0[2]); // addss
+    return swizzle(xmm0, .x, .x, .x, .x);
+}
+test "zmath.dot4" {
+    const v0 = f32x4(-1.0, 2.0, 3.0, -2.0);
+    const v1 = f32x4(4.0, 5.0, 6.0, 2.0);
+    var v = dot4(v0, v1);
+    try expect(approxEqAbs(v, splat(F32x4, 20.0), 0.0001));
+}
+
+pub inline fn cross3(v0: Vec, v1: Vec) Vec {
+    var xmm0 = swizzle(v0, .y, .z, .x, .w);
+    var xmm1 = swizzle(v1, .z, .x, .y, .w);
+    var result = xmm0 * xmm1;
+    xmm0 = swizzle(xmm0, .y, .z, .x, .w);
+    xmm1 = swizzle(xmm1, .z, .x, .y, .w);
+    result = result - xmm0 * xmm1;
+    return andInt(result, f32x4_mask3);
+}
+test "zmath.cross3" {
+    {
+        const v0 = f32x4(1.0, 0.0, 0.0, 1.0);
+        const v1 = f32x4(0.0, 1.0, 0.0, 1.0);
+        var v = cross3(v0, v1);
+        try expect(approxEqAbs(v, f32x4(0.0, 0.0, 1.0, 0.0), 0.0001));
+    }
+    {
+        const v0 = f32x4(1.0, 0.0, 0.0, 1.0);
+        const v1 = f32x4(0.0, -1.0, 0.0, 1.0);
+        var v = cross3(v0, v1);
+        try expect(approxEqAbs(v, f32x4(0.0, 0.0, -1.0, 0.0), 0.0001));
+    }
+    {
+        const v0 = f32x4(-3.0, 0, -2.0, 1.0);
+        const v1 = f32x4(5.0, -1.0, 2.0, 1.0);
+        var v = cross3(v0, v1);
+        try expect(approxEqAbs(v, f32x4(-2.0, -4.0, 3.0, 0.0), 0.0001));
+    }
+}
+
+pub inline fn lengthSq2(v: Vec) F32x4 {
+    return dot2(v, v);
+}
+pub inline fn lengthSq3(v: Vec) F32x4 {
+    return dot3(v, v);
+}
+pub inline fn lengthSq4(v: Vec) F32x4 {
+    return dot4(v, v);
+}
+
+pub inline fn length2(v: Vec) F32x4 {
+    return sqrt(dot2(v, v));
+}
+pub inline fn length3(v: Vec) F32x4 {
+    return sqrt(dot3(v, v));
+}
+pub inline fn length4(v: Vec) F32x4 {
+    return sqrt(dot4(v, v));
+}
+test "zmath.length3" {
+    if (builtin.target.os.tag == .macos and builtin.zig_backend != .stage1) return error.SkipZigTest;
+    {
+        const v = length3(f32x4(1.0, -2.0, 3.0, 1000.0));
+        try expect(approxEqAbs(v, splat(F32x4, math.sqrt(14.0)), 0.001));
+    }
+    {
+        const v = length3(f32x4(1.0, math.nan_f32, math.nan_f32, 1000.0));
+        try expect(all(isNan(v), 0));
+    }
+    {
+        const v = length3(f32x4(1.0, math.inf_f32, 3.0, 1000.0));
+        try expect(all(isInf(v), 0));
+    }
+    {
+        const v = length3(f32x4(3.0, 2.0, 1.0, math.nan_f32));
+        try expect(approxEqAbs(v, splat(F32x4, math.sqrt(14.0)), 0.001));
+    }
+}
+
+pub inline fn normalize2(v: Vec) Vec {
+    return v * splat(F32x4, 1.0) / sqrt(dot2(v, v));
+}
+pub inline fn normalize3(v: Vec) Vec {
+    return v * splat(F32x4, 1.0) / sqrt(dot3(v, v));
+}
+pub inline fn normalize4(v: Vec) Vec {
+    return v * splat(F32x4, 1.0) / sqrt(dot4(v, v));
+}
+test "zmath.normalize3" {
+    {
+        const v0 = f32x4(1.0, -2.0, 3.0, 1000.0);
+        var v = normalize3(v0);
+        try expect(approxEqAbs(v, v0 * splat(F32x4, 1.0 / math.sqrt(14.0)), 0.0005));
+    }
+    {
+        try expect(any(isNan(normalize3(f32x4(1.0, math.inf_f32, 1.0, 1.0))), 0));
+        try expect(any(isNan(normalize3(f32x4(-math.inf_f32, math.inf_f32, 0.0, 0.0))), 0));
+        try expect(any(isNan(normalize3(f32x4(-math.nan_f32, math.qnan_f32, 0.0, 0.0))), 0));
+        try expect(any(isNan(normalize3(f32x4(0, 0, 0, 0))), 0));
+    }
+}
+test "zmath.normalize4" {
+    {
+        const v0 = f32x4(1.0, -2.0, 3.0, 10.0);
+        var v = normalize4(v0);
+        try expect(approxEqAbs(v, v0 * splat(F32x4, 1.0 / math.sqrt(114.0)), 0.0005));
+    }
+    {
+        try expect(any(isNan(normalize4(f32x4(1.0, math.inf_f32, 1.0, 1.0))), 0));
+        try expect(any(isNan(normalize4(f32x4(-math.inf_f32, math.inf_f32, 0.0, 0.0))), 0));
+        try expect(any(isNan(normalize4(f32x4(-math.nan_f32, math.qnan_f32, 0.0, 0.0))), 0));
+        try expect(any(isNan(normalize4(f32x4(0, 0, 0, 0))), 0));
+    }
+}
+
+fn vecMulMat(v: Vec, m: Mat) Vec {
+    var vx = @shuffle(f32, v, undefined, [4]i32{ 0, 0, 0, 0 });
+    var vy = @shuffle(f32, v, undefined, [4]i32{ 1, 1, 1, 1 });
+    var vz = @shuffle(f32, v, undefined, [4]i32{ 2, 2, 2, 2 });
+    var vw = @shuffle(f32, v, undefined, [4]i32{ 3, 3, 3, 3 });
+    return vx * m[0] + vy * m[1] + vz * m[2] + vw * m[3];
+}
+fn matMulVec(m: Mat, v: Vec) Vec {
+    return .{ dot4(m[0], v)[0], dot4(m[1], v)[0], dot4(m[2], v)[0], dot4(m[3], v)[0] };
+}
+test "zmath.vecMulMat" {
+    const m = Mat{
+        f32x4(1.0, 0.0, 0.0, 0.0),
+        f32x4(0.0, 1.0, 0.0, 0.0),
+        f32x4(0.0, 0.0, 1.0, 0.0),
+        f32x4(2.0, 3.0, 4.0, 1.0),
+    };
+    const vm = mul(f32x4(1.0, 2.0, 3.0, 1.0), m);
+    const mv = mul(m, f32x4(1.0, 2.0, 3.0, 1.0));
+    const v = mul(transpose(m), f32x4(1.0, 2.0, 3.0, 1.0));
+    try expect(approxEqAbs(vm, f32x4(3.0, 5.0, 7.0, 1.0), 0.0001));
+    try expect(approxEqAbs(mv, f32x4(1.0, 2.0, 3.0, 21.0), 0.0001));
+    try expect(approxEqAbs(v, f32x4(3.0, 5.0, 7.0, 1.0), 0.0001));
+}
+// ------------------------------------------------------------------------------
+//
+// 4. Matrix functions
+//
+// ------------------------------------------------------------------------------
+pub fn identity() Mat {
+    const static = struct {
+        const identity = Mat{
+            f32x4(1.0, 0.0, 0.0, 0.0),
+            f32x4(0.0, 1.0, 0.0, 0.0),
+            f32x4(0.0, 0.0, 1.0, 0.0),
+            f32x4(0.0, 0.0, 0.0, 1.0),
+        };
+    };
+    return static.identity;
+}
+
+fn mulRetType(comptime Ta: type, comptime Tb: type) type {
+    if (Ta == Mat and Tb == Mat) {
+        return Mat;
+    } else if ((Ta == f32 and Tb == Mat) or (Ta == Mat and Tb == f32)) {
+        return Mat;
+    } else if ((Ta == Vec and Tb == Mat) or (Ta == Mat and Tb == Vec)) {
+        return Vec;
+    }
+    @compileError("zmath.mul() not implemented for types: " ++ @typeName(Ta) ++ @typeName(Tb));
+}
+
+pub fn mul(a: anytype, b: anytype) mulRetType(@TypeOf(a), @TypeOf(b)) {
+    const Ta = @TypeOf(a);
+    const Tb = @TypeOf(b);
+    if (Ta == Mat and Tb == Mat) {
+        return mulMat(a, b);
+    } else if (Ta == f32 and Tb == Mat) {
+        const va = splat(F32x4, a);
+        return Mat{ va * b[0], va * b[1], va * b[2], va * b[3] };
+    } else if (Ta == Mat and Tb == f32) {
+        const vb = splat(F32x4, b);
+        return Mat{ a[0] * vb, a[1] * vb, a[2] * vb, a[3] * vb };
+    } else if (Ta == Vec and Tb == Mat) {
+        return vecMulMat(a, b);
+    } else if (Ta == Mat and Tb == Vec) {
+        return matMulVec(a, b);
+    } else {
+        @compileError("zmath.mul() not implemented for types: " ++ @typeName(Ta) ++ ", " ++ @typeName(Tb));
+    }
+}
+test "zmath.mul" {
+    {
+        const m = Mat{
+            f32x4(0.1, 0.2, 0.3, 0.4),
+            f32x4(0.5, 0.6, 0.7, 0.8),
+            f32x4(0.9, 1.0, 1.1, 1.2),
+            f32x4(1.3, 1.4, 1.5, 1.6),
+        };
+        const ms = mul(@as(f32, 2.0), m);
+        try expect(approxEqAbs(ms[0], f32x4(0.2, 0.4, 0.6, 0.8), 0.0001));
+        try expect(approxEqAbs(ms[1], f32x4(1.0, 1.2, 1.4, 1.6), 0.0001));
+        try expect(approxEqAbs(ms[2], f32x4(1.8, 2.0, 2.2, 2.4), 0.0001));
+        try expect(approxEqAbs(ms[3], f32x4(2.6, 2.8, 3.0, 3.2), 0.0001));
+    }
+}
+
+fn mulMat(m0: Mat, m1: Mat) Mat {
+    var result: Mat = undefined;
+    comptime var row: u32 = 0;
+    inline while (row < 4) : (row += 1) {
+        const vx = swizzle(m0[row], .x, .x, .x, .x);
+        const vy = swizzle(m0[row], .y, .y, .y, .y);
+        const vz = swizzle(m0[row], .z, .z, .z, .z);
+        const vw = swizzle(m0[row], .w, .w, .w, .w);
+        result[row] = mulAdd(vx, m1[0], vz * m1[2]) + mulAdd(vy, m1[1], vw * m1[3]);
+    }
+    return result;
+}
+test "zmath.matrix.mul" {
+    const a = Mat{
+        f32x4(0.1, 0.2, 0.3, 0.4),
+        f32x4(0.5, 0.6, 0.7, 0.8),
+        f32x4(0.9, 1.0, 1.1, 1.2),
+        f32x4(1.3, 1.4, 1.5, 1.6),
+    };
+    const b = Mat{
+        f32x4(1.7, 1.8, 1.9, 2.0),
+        f32x4(2.1, 2.2, 2.3, 2.4),
+        f32x4(2.5, 2.6, 2.7, 2.8),
+        f32x4(2.9, 3.0, 3.1, 3.2),
+    };
+    const c = mul(a, b);
+    try expect(approxEqAbs(c[0], f32x4(2.5, 2.6, 2.7, 2.8), 0.0001));
+    try expect(approxEqAbs(c[1], f32x4(6.18, 6.44, 6.7, 6.96), 0.0001));
+    try expect(approxEqAbs(c[2], f32x4(9.86, 10.28, 10.7, 11.12), 0.0001));
+    try expect(approxEqAbs(c[3], f32x4(13.54, 14.12, 14.7, 15.28), 0.0001));
+}
+
+pub fn transpose(m: Mat) Mat {
+    const temp1 = @shuffle(f32, m[0], m[1], [4]i32{ 0, 1, ~@as(i32, 0), ~@as(i32, 1) });
+    const temp3 = @shuffle(f32, m[0], m[1], [4]i32{ 2, 3, ~@as(i32, 2), ~@as(i32, 3) });
+    const temp2 = @shuffle(f32, m[2], m[3], [4]i32{ 0, 1, ~@as(i32, 0), ~@as(i32, 1) });
+    const temp4 = @shuffle(f32, m[2], m[3], [4]i32{ 2, 3, ~@as(i32, 2), ~@as(i32, 3) });
+    return .{
+        @shuffle(f32, temp1, temp2, [4]i32{ 0, 2, ~@as(i32, 0), ~@as(i32, 2) }),
+        @shuffle(f32, temp1, temp2, [4]i32{ 1, 3, ~@as(i32, 1), ~@as(i32, 3) }),
+        @shuffle(f32, temp3, temp4, [4]i32{ 0, 2, ~@as(i32, 0), ~@as(i32, 2) }),
+        @shuffle(f32, temp3, temp4, [4]i32{ 1, 3, ~@as(i32, 1), ~@as(i32, 3) }),
+    };
+}
+test "zmath.matrix.transpose" {
+    const m = Mat{
+        f32x4(1.0, 2.0, 3.0, 4.0),
+        f32x4(5.0, 6.0, 7.0, 8.0),
+        f32x4(9.0, 10.0, 11.0, 12.0),
+        f32x4(13.0, 14.0, 15.0, 16.0),
+    };
+    const mt = transpose(m);
+    try expect(approxEqAbs(mt[0], f32x4(1.0, 5.0, 9.0, 13.0), 0.0001));
+    try expect(approxEqAbs(mt[1], f32x4(2.0, 6.0, 10.0, 14.0), 0.0001));
+    try expect(approxEqAbs(mt[2], f32x4(3.0, 7.0, 11.0, 15.0), 0.0001));
+    try expect(approxEqAbs(mt[3], f32x4(4.0, 8.0, 12.0, 16.0), 0.0001));
+}
+
+pub fn rotationX(angle: f32) Mat {
+    const sc = sincos(angle);
+    return .{
+        f32x4(1.0, 0.0, 0.0, 0.0),
+        f32x4(0.0, sc[1], sc[0], 0.0),
+        f32x4(0.0, -sc[0], sc[1], 0.0),
+        f32x4(0.0, 0.0, 0.0, 1.0),
+    };
+}
+
+pub fn rotationY(angle: f32) Mat {
+    const sc = sincos(angle);
+    return .{
+        f32x4(sc[1], 0.0, -sc[0], 0.0),
+        f32x4(0.0, 1.0, 0.0, 0.0),
+        f32x4(sc[0], 0.0, sc[1], 0.0),
+        f32x4(0.0, 0.0, 0.0, 1.0),
+    };
+}
+
+pub fn rotationZ(angle: f32) Mat {
+    const sc = sincos(angle);
+    return .{
+        f32x4(sc[1], sc[0], 0.0, 0.0),
+        f32x4(-sc[0], sc[1], 0.0, 0.0),
+        f32x4(0.0, 0.0, 1.0, 0.0),
+        f32x4(0.0, 0.0, 0.0, 1.0),
+    };
+}
+
+pub fn translation(x: f32, y: f32, z: f32) Mat {
+    return .{
+        f32x4(1.0, 0.0, 0.0, 0.0),
+        f32x4(0.0, 1.0, 0.0, 0.0),
+        f32x4(0.0, 0.0, 1.0, 0.0),
+        f32x4(x, y, z, 1.0),
+    };
+}
+pub fn translationV(v: Vec) Mat {
+    return translation(v[0], v[1], v[2]);
+}
+
+pub fn scaling(x: f32, y: f32, z: f32) Mat {
+    return .{
+        f32x4(x, 0.0, 0.0, 0.0),
+        f32x4(0.0, y, 0.0, 0.0),
+        f32x4(0.0, 0.0, z, 0.0),
+        f32x4(0.0, 0.0, 0.0, 1.0),
+    };
+}
+pub fn scalingV(v: Vec) Mat {
+    return scaling(v[0], v[1], v[2]);
+}
+
+pub fn lookToLh(eyepos: Vec, eyedir: Vec, updir: Vec) Mat {
+    const az = normalize3(eyedir);
+    const ax = normalize3(cross3(updir, az));
+    const ay = normalize3(cross3(az, ax));
+    return transpose(.{
+        f32x4(ax[0], ax[1], ax[2], -dot3(ax, eyepos)[0]),
+        f32x4(ay[0], ay[1], ay[2], -dot3(ay, eyepos)[0]),
+        f32x4(az[0], az[1], az[2], -dot3(az, eyepos)[0]),
+        f32x4(0.0, 0.0, 0.0, 1.0),
+    });
+}
+pub fn lookToRh(eyepos: Vec, eyedir: Vec, updir: Vec) Mat {
+    return lookToLh(eyepos, -eyedir, updir);
+}
+pub fn lookAtLh(eyepos: Vec, focuspos: Vec, updir: Vec) Mat {
+    return lookToLh(eyepos, focuspos - eyepos, updir);
+}
+pub fn lookAtRh(eyepos: Vec, focuspos: Vec, updir: Vec) Mat {
+    return lookToLh(eyepos, eyepos - focuspos, updir);
+}
+test "zmath.matrix.lookToLh" {
+    const m = lookToLh(f32x4(0.0, 0.0, -3.0, 1.0), f32x4(0.0, 0.0, 1.0, 0.0), f32x4(0.0, 1.0, 0.0, 0.0));
+    try expect(approxEqAbs(m[0], f32x4(1.0, 0.0, 0.0, 0.0), 0.001));
+    try expect(approxEqAbs(m[1], f32x4(0.0, 1.0, 0.0, 0.0), 0.001));
+    try expect(approxEqAbs(m[2], f32x4(0.0, 0.0, 1.0, 0.0), 0.001));
+    try expect(approxEqAbs(m[3], f32x4(0.0, 0.0, 3.0, 1.0), 0.001));
+}
+
+pub fn perspectiveFovLh(fovy: f32, aspect: f32, near: f32, far: f32) Mat {
+    const scfov = sincos(0.5 * fovy);
+
+    assert(near > 0.0 and far > 0.0 and far > near);
+    assert(!math.approxEqAbs(f32, scfov[0], 0.0, 0.001));
+    assert(!math.approxEqAbs(f32, far, near, 0.001));
+    assert(!math.approxEqAbs(f32, aspect, 0.0, 0.01));
+
+    const h = scfov[1] / scfov[0];
+    const w = h / aspect;
+    const r = far / (far - near);
+    return .{
+        f32x4(w, 0.0, 0.0, 0.0),
+        f32x4(0.0, h, 0.0, 0.0),
+        f32x4(0.0, 0.0, r, 1.0),
+        f32x4(0.0, 0.0, -r * near, 0.0),
+    };
+}
+pub fn perspectiveFovRh(fovy: f32, aspect: f32, near: f32, far: f32) Mat {
+    const scfov = sincos(0.5 * fovy);
+
+    assert(near > 0.0 and far > 0.0 and far > near);
+    assert(!math.approxEqAbs(f32, scfov[0], 0.0, 0.001));
+    assert(!math.approxEqAbs(f32, far, near, 0.001));
+    assert(!math.approxEqAbs(f32, aspect, 0.0, 0.01));
+
+    const h = scfov[1] / scfov[0];
+    const w = h / aspect;
+    const r = far / (near - far);
+    return .{
+        f32x4(w, 0.0, 0.0, 0.0),
+        f32x4(0.0, h, 0.0, 0.0),
+        f32x4(0.0, 0.0, r, -1.0),
+        f32x4(0.0, 0.0, r * near, 0.0),
+    };
+}
+
+// Produces Z values in [-1.0, 1.0] range (OpenGL defaults)
+pub fn perspectiveFovLhGl(fovy: f32, aspect: f32, near: f32, far: f32) Mat {
+    const scfov = sincos(0.5 * fovy);
+
+    assert(near > 0.0 and far > 0.0 and far > near);
+    assert(!math.approxEqAbs(f32, scfov[0], 0.0, 0.001));
+    assert(!math.approxEqAbs(f32, far, near, 0.001));
+    assert(!math.approxEqAbs(f32, aspect, 0.0, 0.01));
+
+    const h = scfov[1] / scfov[0];
+    const w = h / aspect;
+    const r = far - near;
+    return .{
+        f32x4(w, 0.0, 0.0, 0.0),
+        f32x4(0.0, h, 0.0, 0.0),
+        f32x4(0.0, 0.0, (near + far) / r, 1.0),
+        f32x4(0.0, 0.0, 2.0 * near * far / -r, 0.0),
+    };
+}
+
+// Produces Z values in [-1.0, 1.0] range (OpenGL defaults)
+pub fn perspectiveFovRhGl(fovy: f32, aspect: f32, near: f32, far: f32) Mat {
+    const scfov = sincos(0.5 * fovy);
+
+    assert(near > 0.0 and far > 0.0 and far > near);
+    assert(!math.approxEqAbs(f32, scfov[0], 0.0, 0.001));
+    assert(!math.approxEqAbs(f32, far, near, 0.001));
+    assert(!math.approxEqAbs(f32, aspect, 0.0, 0.01));
+
+    const h = scfov[1] / scfov[0];
+    const w = h / aspect;
+    const r = near - far;
+    return .{
+        f32x4(w, 0.0, 0.0, 0.0),
+        f32x4(0.0, h, 0.0, 0.0),
+        f32x4(0.0, 0.0, (near + far) / r, -1.0),
+        f32x4(0.0, 0.0, 2.0 * near * far / r, 0.0),
+    };
+}
+
+pub fn orthographicLh(w: f32, h: f32, near: f32, far: f32) Mat {
+    assert(!math.approxEqAbs(f32, w, 0.0, 0.001));
+    assert(!math.approxEqAbs(f32, h, 0.0, 0.001));
+    assert(!math.approxEqAbs(f32, far, near, 0.001));
+
+    const r = 1 / (far - near);
+    return .{
+        f32x4(2 / w, 0.0, 0.0, 0.0),
+        f32x4(0.0, 2 / h, 0.0, 0.0),
+        f32x4(0.0, 0.0, r, 0.0),
+        f32x4(0.0, 0.0, -r * near, 1.0),
+    };
+}
+
+pub fn orthographicRh(w: f32, h: f32, near: f32, far: f32) Mat {
+    assert(!math.approxEqAbs(f32, w, 0.0, 0.001));
+    assert(!math.approxEqAbs(f32, h, 0.0, 0.001));
+    assert(!math.approxEqAbs(f32, far, near, 0.001));
+
+    const r = 1 / (near - far);
+    return .{
+        f32x4(2 / w, 0.0, 0.0, 0.0),
+        f32x4(0.0, 2 / h, 0.0, 0.0),
+        f32x4(0.0, 0.0, r, 0.0),
+        f32x4(0.0, 0.0, r * near, 1.0),
+    };
+}
+
+// Produces Z values in [-1.0, 1.0] range (OpenGL defaults)
+pub fn orthographicLhGl(w: f32, h: f32, near: f32, far: f32) Mat {
+    assert(!math.approxEqAbs(f32, w, 0.0, 0.001));
+    assert(!math.approxEqAbs(f32, h, 0.0, 0.001));
+    assert(!math.approxEqAbs(f32, far, near, 0.001));
+
+    const r = far - near;
+    return .{
+        f32x4(2 / w, 0.0, 0.0, 0.0),
+        f32x4(0.0, 2 / h, 0.0, 0.0),
+        f32x4(0.0, 0.0, 2 / r, 0.0),
+        f32x4(0.0, 0.0, (near + far) / -r, 1.0),
+    };
+}
+
+// Produces Z values in [-1.0, 1.0] range (OpenGL defaults)
+pub fn orthographicRhGl(w: f32, h: f32, near: f32, far: f32) Mat {
+    assert(!math.approxEqAbs(f32, w, 0.0, 0.001));
+    assert(!math.approxEqAbs(f32, h, 0.0, 0.001));
+    assert(!math.approxEqAbs(f32, far, near, 0.001));
+
+    const r = near - far;
+    return .{
+        f32x4(2 / w, 0.0, 0.0, 0.0),
+        f32x4(0.0, 2 / h, 0.0, 0.0),
+        f32x4(0.0, 0.0, 2 / r, 0.0),
+        f32x4(0.0, 0.0, (near + far) / r, 1.0),
+    };
+}
+
+pub fn orthographicOffCenterLh(left: f32, right: f32, top: f32, bottom: f32, near: f32, far: f32) Mat {
+    assert(!math.approxEqAbs(f32, far, near, 0.001));
+
+    const r = 1 / (far - near);
+    return .{
+        f32x4(2 / (right - left), 0.0, 0.0, 0.0),
+        f32x4(0.0, 2 / (top - bottom), 0.0, 0.0),
+        f32x4(0.0, 0.0, r, 0.0),
+        f32x4(-(right + left) / (right - left), -(top + bottom) / (top - bottom), -r * near, 1.0),
+    };
+}
+
+pub fn orthographicOffCenterRh(left: f32, right: f32, top: f32, bottom: f32, near: f32, far: f32) Mat {
+    assert(!math.approxEqAbs(f32, far, near, 0.001));
+
+    const r = 1 / (near - far);
+    return .{
+        f32x4(2 / (right - left), 0.0, 0.0, 0.0),
+        f32x4(0.0, 2 / (top - bottom), 0.0, 0.0),
+        f32x4(0.0, 0.0, r, 0.0),
+        f32x4(-(right + left) / (right - left), -(top + bottom) / (top - bottom), r * near, 1.0),
+    };
+}
+
+// Produces Z values in [-1.0, 1.0] range (OpenGL defaults)
+pub fn orthographicOffCenterLhGl(left: f32, right: f32, top: f32, bottom: f32, near: f32, far: f32) Mat {
+    assert(!math.approxEqAbs(f32, far, near, 0.001));
+
+    const r = far - near;
+    return .{
+        f32x4(2 / (right - left), 0.0, 0.0, 0.0),
+        f32x4(0.0, 2 / (top - bottom), 0.0, 0.0),
+        f32x4(0.0, 0.0, 2 / r, 0.0),
+        f32x4(-(right + left) / (right - left), -(top + bottom) / (top - bottom), (near + far) / -r, 1.0),
+    };
+}
+
+// Produces Z values in [-1.0, 1.0] range (OpenGL defaults)
+pub fn orthographicOffCenterRhGl(left: f32, right: f32, top: f32, bottom: f32, near: f32, far: f32) Mat {
+    assert(!math.approxEqAbs(f32, far, near, 0.001));
+
+    const r = near - far;
+    return .{
+        f32x4(2 / (right - left), 0.0, 0.0, 0.0),
+        f32x4(0.0, 2 / (top - bottom), 0.0, 0.0),
+        f32x4(0.0, 0.0, 2 / r, 0.0),
+        f32x4(-(right + left) / (right - left), -(top + bottom) / (top - bottom), (near + far) / r, 1.0),
+    };
+}
+
+pub fn determinant(m: Mat) F32x4 {
+    var v0 = swizzle(m[2], .y, .x, .x, .x);
+    var v1 = swizzle(m[3], .z, .z, .y, .y);
+    var v2 = swizzle(m[2], .y, .x, .x, .x);
+    var v3 = swizzle(m[3], .w, .w, .w, .z);
+    var v4 = swizzle(m[2], .z, .z, .y, .y);
+    var v5 = swizzle(m[3], .w, .w, .w, .z);
+
+    var p0 = v0 * v1;
+    var p1 = v2 * v3;
+    var p2 = v4 * v5;
+
+    v0 = swizzle(m[2], .z, .z, .y, .y);
+    v1 = swizzle(m[3], .y, .x, .x, .x);
+    v2 = swizzle(m[2], .w, .w, .w, .z);
+    v3 = swizzle(m[3], .y, .x, .x, .x);
+    v4 = swizzle(m[2], .w, .w, .w, .z);
+    v5 = swizzle(m[3], .z, .z, .y, .y);
+
+    p0 = mulAdd(-v0, v1, p0);
+    p1 = mulAdd(-v2, v3, p1);
+    p2 = mulAdd(-v4, v5, p2);
+
+    v0 = swizzle(m[1], .w, .w, .w, .z);
+    v1 = swizzle(m[1], .z, .z, .y, .y);
+    v2 = swizzle(m[1], .y, .x, .x, .x);
+
+    var s = m[0] * f32x4(1.0, -1.0, 1.0, -1.0);
+    var r = v0 * p0;
+    r = mulAdd(-v1, p1, r);
+    r = mulAdd(v2, p2, r);
+    return dot4(s, r);
+}
+test "zmath.matrix.determinant" {
+    const m = Mat{
+        f32x4(10.0, -9.0, -12.0, 1.0),
+        f32x4(7.0, -12.0, 11.0, 1.0),
+        f32x4(-10.0, 10.0, 3.0, 1.0),
+        f32x4(1.0, 2.0, 3.0, 4.0),
+    };
+    try expect(approxEqAbs(determinant(m), splat(F32x4, 2939.0), 0.0001));
+}
+
+pub fn inverse(a: anytype) @TypeOf(a) {
+    const T = @TypeOf(a);
+    return switch (T) {
+        Mat => inverseMat(a),
+        Quat => inverseQuat(a),
+        else => @compileError("zmath.inverse() not implemented for " ++ @typeName(T)),
+    };
+}
+
+fn inverseMat(m: Mat) Mat {
+    return inverseDet(m, null);
+}
+
+pub fn inverseDet(m: Mat, out_det: ?*F32x4) Mat {
+    const mt = transpose(m);
+    var v0: [4]F32x4 = undefined;
+    var v1: [4]F32x4 = undefined;
+
+    v0[0] = swizzle(mt[2], .x, .x, .y, .y);
+    v1[0] = swizzle(mt[3], .z, .w, .z, .w);
+    v0[1] = swizzle(mt[0], .x, .x, .y, .y);
+    v1[1] = swizzle(mt[1], .z, .w, .z, .w);
+    v0[2] = @shuffle(f32, mt[2], mt[0], [4]i32{ 0, 2, ~@as(i32, 0), ~@as(i32, 2) });
+    v1[2] = @shuffle(f32, mt[3], mt[1], [4]i32{ 1, 3, ~@as(i32, 1), ~@as(i32, 3) });
+
+    var d0 = v0[0] * v1[0];
+    var d1 = v0[1] * v1[1];
+    var d2 = v0[2] * v1[2];
+
+    v0[0] = swizzle(mt[2], .z, .w, .z, .w);
+    v1[0] = swizzle(mt[3], .x, .x, .y, .y);
+    v0[1] = swizzle(mt[0], .z, .w, .z, .w);
+    v1[1] = swizzle(mt[1], .x, .x, .y, .y);
+    v0[2] = @shuffle(f32, mt[2], mt[0], [4]i32{ 1, 3, ~@as(i32, 1), ~@as(i32, 3) });
+    v1[2] = @shuffle(f32, mt[3], mt[1], [4]i32{ 0, 2, ~@as(i32, 0), ~@as(i32, 2) });
+
+    d0 = mulAdd(-v0[0], v1[0], d0);
+    d1 = mulAdd(-v0[1], v1[1], d1);
+    d2 = mulAdd(-v0[2], v1[2], d2);
+
+    v0[0] = swizzle(mt[1], .y, .z, .x, .y);
+    v1[0] = @shuffle(f32, d0, d2, [4]i32{ ~@as(i32, 1), 1, 3, 0 });
+    v0[1] = swizzle(mt[0], .z, .x, .y, .x);
+    v1[1] = @shuffle(f32, d0, d2, [4]i32{ 3, ~@as(i32, 1), 1, 2 });
+    v0[2] = swizzle(mt[3], .y, .z, .x, .y);
+    v1[2] = @shuffle(f32, d1, d2, [4]i32{ ~@as(i32, 3), 1, 3, 0 });
+    v0[3] = swizzle(mt[2], .z, .x, .y, .x);
+    v1[3] = @shuffle(f32, d1, d2, [4]i32{ 3, ~@as(i32, 3), 1, 2 });
+
+    var c0 = v0[0] * v1[0];
+    var c2 = v0[1] * v1[1];
+    var c4 = v0[2] * v1[2];
+    var c6 = v0[3] * v1[3];
+
+    v0[0] = swizzle(mt[1], .z, .w, .y, .z);
+    v1[0] = @shuffle(f32, d0, d2, [4]i32{ 3, 0, 1, ~@as(i32, 0) });
+    v0[1] = swizzle(mt[0], .w, .z, .w, .y);
+    v1[1] = @shuffle(f32, d0, d2, [4]i32{ 2, 1, ~@as(i32, 0), 0 });
+    v0[2] = swizzle(mt[3], .z, .w, .y, .z);
+    v1[2] = @shuffle(f32, d1, d2, [4]i32{ 3, 0, 1, ~@as(i32, 2) });
+    v0[3] = swizzle(mt[2], .w, .z, .w, .y);
+    v1[3] = @shuffle(f32, d1, d2, [4]i32{ 2, 1, ~@as(i32, 2), 0 });
+
+    c0 = mulAdd(-v0[0], v1[0], c0);
+    c2 = mulAdd(-v0[1], v1[1], c2);
+    c4 = mulAdd(-v0[2], v1[2], c4);
+    c6 = mulAdd(-v0[3], v1[3], c6);
+
+    v0[0] = swizzle(mt[1], .w, .x, .w, .x);
+    v1[0] = @shuffle(f32, d0, d2, [4]i32{ 2, ~@as(i32, 1), ~@as(i32, 0), 2 });
+    v0[1] = swizzle(mt[0], .y, .w, .x, .z);
+    v1[1] = @shuffle(f32, d0, d2, [4]i32{ ~@as(i32, 1), 0, 3, ~@as(i32, 0) });
+    v0[2] = swizzle(mt[3], .w, .x, .w, .x);
+    v1[2] = @shuffle(f32, d1, d2, [4]i32{ 2, ~@as(i32, 3), ~@as(i32, 2), 2 });
+    v0[3] = swizzle(mt[2], .y, .w, .x, .z);
+    v1[3] = @shuffle(f32, d1, d2, [4]i32{ ~@as(i32, 3), 0, 3, ~@as(i32, 2) });
+
+    const c1 = mulAdd(-v0[0], v1[0], c0);
+    const c3 = mulAdd(v0[1], v1[1], c2);
+    const c5 = mulAdd(-v0[2], v1[2], c4);
+    const c7 = mulAdd(v0[3], v1[3], c6);
+
+    c0 = mulAdd(v0[0], v1[0], c0);
+    c2 = mulAdd(-v0[1], v1[1], c2);
+    c4 = mulAdd(v0[2], v1[2], c4);
+    c6 = mulAdd(-v0[3], v1[3], c6);
+
+    var mr = Mat{
+        f32x4(c0[0], c1[1], c0[2], c1[3]),
+        f32x4(c2[0], c3[1], c2[2], c3[3]),
+        f32x4(c4[0], c5[1], c4[2], c5[3]),
+        f32x4(c6[0], c7[1], c6[2], c7[3]),
+    };
+
+    const det = dot4(mr[0], mt[0]);
+    if (out_det != null) {
+        out_det.?.* = det;
+    }
+
+    if (math.approxEqAbs(f32, det[0], 0.0, math.f32_epsilon)) {
+        return .{
+            f32x4(0.0, 0.0, 0.0, 0.0),
+            f32x4(0.0, 0.0, 0.0, 0.0),
+            f32x4(0.0, 0.0, 0.0, 0.0),
+            f32x4(0.0, 0.0, 0.0, 0.0),
+        };
+    }
+
+    const scale = splat(F32x4, 1.0) / det;
+    mr[0] *= scale;
+    mr[1] *= scale;
+    mr[2] *= scale;
+    mr[3] *= scale;
+    return mr;
+}
+test "zmath.matrix.inverse" {
+    const m = Mat{
+        f32x4(10.0, -9.0, -12.0, 1.0),
+        f32x4(7.0, -12.0, 11.0, 1.0),
+        f32x4(-10.0, 10.0, 3.0, 1.0),
+        f32x4(1.0, 2.0, 3.0, 4.0),
+    };
+    var det: F32x4 = undefined;
+    const mi = inverseDet(m, &det);
+    try expect(approxEqAbs(det, splat(F32x4, 2939.0), 0.0001));
+
+    try expect(approxEqAbs(mi[0], f32x4(-0.170806, -0.13576, -0.349439, 0.164001), 0.0001));
+    try expect(approxEqAbs(mi[1], f32x4(-0.163661, -0.14801, -0.253147, 0.141204), 0.0001));
+    try expect(approxEqAbs(mi[2], f32x4(-0.0871045, 0.00646478, -0.0785982, 0.0398095), 0.0001));
+    try expect(approxEqAbs(mi[3], f32x4(0.18986, 0.103096, 0.272882, 0.10854), 0.0001));
+}
+
+pub fn matFromNormAxisAngle(axis: Vec, angle: f32) Mat {
+    const sincos_angle = sincos(angle);
+
+    const c2 = splat(F32x4, 1.0 - sincos_angle[1]);
+    const c1 = splat(F32x4, sincos_angle[1]);
+    const c0 = splat(F32x4, sincos_angle[0]);
+
+    const n0 = swizzle(axis, .y, .z, .x, .w);
+    const n1 = swizzle(axis, .z, .x, .y, .w);
+
+    var v0 = c2 * n0 * n1;
+    const r0 = c2 * axis * axis + c1;
+    const r1 = c0 * axis + v0;
+    var r2 = v0 - c0 * axis;
+
+    v0 = andInt(r0, f32x4_mask3);
+
+    var v1 = @shuffle(f32, r1, r2, [4]i32{ 0, 2, ~@as(i32, 1), ~@as(i32, 2) });
+    v1 = swizzle(v1, .y, .z, .w, .x);
+
+    var v2 = @shuffle(f32, r1, r2, [4]i32{ 1, 1, ~@as(i32, 0), ~@as(i32, 0) });
+    v2 = swizzle(v2, .x, .z, .x, .z);
+
+    r2 = @shuffle(f32, v0, v1, [4]i32{ 0, 3, ~@as(i32, 0), ~@as(i32, 1) });
+    r2 = swizzle(r2, .x, .z, .w, .y);
+
+    var m: Mat = undefined;
+    m[0] = r2;
+
+    r2 = @shuffle(f32, v0, v1, [4]i32{ 1, 3, ~@as(i32, 2), ~@as(i32, 3) });
+    r2 = swizzle(r2, .z, .x, .w, .y);
+    m[1] = r2;
+
+    v2 = @shuffle(f32, v2, v0, [4]i32{ 0, 1, ~@as(i32, 2), ~@as(i32, 3) });
+    m[2] = v2;
+    m[3] = f32x4(0.0, 0.0, 0.0, 1.0);
+    return m;
+}
+pub fn matFromAxisAngle(axis: Vec, angle: f32) Mat {
+    assert(!all(axis == splat(F32x4, 0.0), 3));
+    assert(!all(isInf(axis), 3));
+    const normal = normalize3(axis);
+    return matFromNormAxisAngle(normal, angle);
+}
+test "zmath.matrix.matFromAxisAngle" {
+    {
+        const m0 = matFromAxisAngle(f32x4(1.0, 0.0, 0.0, 0.0), math.pi * 0.25);
+        const m1 = rotationX(math.pi * 0.25);
+        try expect(approxEqAbs(m0[0], m1[0], 0.001));
+        try expect(approxEqAbs(m0[1], m1[1], 0.001));
+        try expect(approxEqAbs(m0[2], m1[2], 0.001));
+        try expect(approxEqAbs(m0[3], m1[3], 0.001));
+    }
+    {
+        const m0 = matFromAxisAngle(f32x4(0.0, 1.0, 0.0, 0.0), math.pi * 0.125);
+        const m1 = rotationY(math.pi * 0.125);
+        try expect(approxEqAbs(m0[0], m1[0], 0.001));
+        try expect(approxEqAbs(m0[1], m1[1], 0.001));
+        try expect(approxEqAbs(m0[2], m1[2], 0.001));
+        try expect(approxEqAbs(m0[3], m1[3], 0.001));
+    }
+    {
+        const m0 = matFromAxisAngle(f32x4(0.0, 0.0, 1.0, 0.0), math.pi * 0.333);
+        const m1 = rotationZ(math.pi * 0.333);
+        try expect(approxEqAbs(m0[0], m1[0], 0.001));
+        try expect(approxEqAbs(m0[1], m1[1], 0.001));
+        try expect(approxEqAbs(m0[2], m1[2], 0.001));
+        try expect(approxEqAbs(m0[3], m1[3], 0.001));
+    }
+}
+
+pub fn matFromQuat(quat: Quat) Mat {
+    var q0 = quat + quat;
+    var q1 = quat * q0;
+
+    var v0 = swizzle(q1, .y, .x, .x, .w);
+    v0 = andInt(v0, f32x4_mask3);
+
+    var v1 = swizzle(q1, .z, .z, .y, .w);
+    v1 = andInt(v1, f32x4_mask3);
+
+    var r0 = (f32x4(1.0, 1.0, 1.0, 0.0) - v0) - v1;
+
+    v0 = swizzle(quat, .x, .x, .y, .w);
+    v1 = swizzle(q0, .z, .y, .z, .w);
+    v0 = v0 * v1;
+
+    v1 = swizzle(quat, .w, .w, .w, .w);
+    var v2 = swizzle(q0, .y, .z, .x, .w);
+    v1 = v1 * v2;
+
+    var r1 = v0 + v1;
+    var r2 = v0 - v1;
+
+    v0 = @shuffle(f32, r1, r2, [4]i32{ 1, 2, ~@as(i32, 0), ~@as(i32, 1) });
+    v0 = swizzle(v0, .x, .z, .w, .y);
+    v1 = @shuffle(f32, r1, r2, [4]i32{ 0, 0, ~@as(i32, 2), ~@as(i32, 2) });
+    v1 = swizzle(v1, .x, .z, .x, .z);
+
+    q1 = @shuffle(f32, r0, v0, [4]i32{ 0, 3, ~@as(i32, 0), ~@as(i32, 1) });
+    q1 = swizzle(q1, .x, .z, .w, .y);
+
+    var m: Mat = undefined;
+    m[0] = q1;
+
+    q1 = @shuffle(f32, r0, v0, [4]i32{ 1, 3, ~@as(i32, 2), ~@as(i32, 3) });
+    q1 = swizzle(q1, .z, .x, .w, .y);
+    m[1] = q1;
+
+    q1 = @shuffle(f32, v1, r0, [4]i32{ 0, 1, ~@as(i32, 2), ~@as(i32, 3) });
+    m[2] = q1;
+    m[3] = f32x4(0.0, 0.0, 0.0, 1.0);
+    return m;
+}
+test "zmath.matrix.matFromQuat" {
+    {
+        const m = matFromQuat(f32x4(0.0, 0.0, 0.0, 1.0));
+        try expect(approxEqAbs(m[0], f32x4(1.0, 0.0, 0.0, 0.0), 0.0001));
+        try expect(approxEqAbs(m[1], f32x4(0.0, 1.0, 0.0, 0.0), 0.0001));
+        try expect(approxEqAbs(m[2], f32x4(0.0, 0.0, 1.0, 0.0), 0.0001));
+        try expect(approxEqAbs(m[3], f32x4(0.0, 0.0, 0.0, 1.0), 0.0001));
+    }
+}
+
+pub fn matFromRollPitchYaw(pitch: f32, yaw: f32, roll: f32) Mat {
+    return matFromRollPitchYawV(f32x4(pitch, yaw, roll, 0.0));
+}
+pub fn matFromRollPitchYawV(angles: Vec) Mat {
+    return matFromQuat(quatFromRollPitchYawV(angles));
+}
+
+pub fn matToQuat(m: Mat) Quat {
+    return quatFromMat(m);
+}
+
+pub inline fn loadMat(mem: []const f32) Mat {
+    return .{
+        load(mem[0..4], F32x4, 0),
+        load(mem[4..8], F32x4, 0),
+        load(mem[8..12], F32x4, 0),
+        load(mem[12..16], F32x4, 0),
+    };
+}
+test "zmath.loadMat" {
+    const a = [18]f32{
+        1.0,  2.0,  3.0,  4.0,
+        5.0,  6.0,  7.0,  8.0,
+        9.0,  10.0, 11.0, 12.0,
+        13.0, 14.0, 15.0, 16.0,
+        17.0, 18.0,
+    };
+    const m = loadMat(a[1..]);
+    try expect(approxEqAbs(m[0], f32x4(2.0, 3.0, 4.0, 5.0), 0.0));
+    try expect(approxEqAbs(m[1], f32x4(6.0, 7.0, 8.0, 9.0), 0.0));
+    try expect(approxEqAbs(m[2], f32x4(10.0, 11.0, 12.0, 13.0), 0.0));
+    try expect(approxEqAbs(m[3], f32x4(14.0, 15.0, 16.0, 17.0), 0.0));
+}
+
+pub inline fn storeMat(mem: []f32, m: Mat) void {
+    store(mem[0..4], m[0], 0);
+    store(mem[4..8], m[1], 0);
+    store(mem[8..12], m[2], 0);
+    store(mem[12..16], m[3], 0);
+}
+
+pub inline fn loadMat43(mem: []const f32) Mat {
+    return .{
+        f32x4(mem[0], mem[1], mem[2], 0.0),
+        f32x4(mem[3], mem[4], mem[5], 0.0),
+        f32x4(mem[6], mem[7], mem[8], 0.0),
+        f32x4(mem[9], mem[10], mem[11], 1.0),
+    };
+}
+
+pub inline fn storeMat43(mem: []f32, m: Mat) void {
+    store(mem[0..3], m[0], 3);
+    store(mem[3..6], m[1], 3);
+    store(mem[6..9], m[2], 3);
+    store(mem[9..12], m[3], 3);
+}
+
+pub inline fn loadMat34(mem: []const f32) Mat {
+    return .{
+        load(mem[0..4], F32x4, 0),
+        load(mem[4..8], F32x4, 0),
+        load(mem[8..12], F32x4, 0),
+        f32x4(0.0, 0.0, 0.0, 1.0),
+    };
+}
+
+pub inline fn storeMat34(mem: []f32, m: Mat) void {
+    store(mem[0..4], m[0], 0);
+    store(mem[4..8], m[1], 0);
+    store(mem[8..12], m[2], 0);
+}
+
+pub inline fn matToArr(m: Mat) [16]f32 {
+    var array: [16]f32 = undefined;
+    storeMat(array[0..], m);
+    return array;
+}
+
+pub inline fn matToArr43(m: Mat) [12]f32 {
+    var array: [12]f32 = undefined;
+    storeMat43(array[0..], m);
+    return array;
+}
+
+pub inline fn matToArr34(m: Mat) [12]f32 {
+    var array: [12]f32 = undefined;
+    storeMat34(array[0..], m);
+    return array;
+}
+// ------------------------------------------------------------------------------
+//
+// 5. Quaternion functions
+//
+// ------------------------------------------------------------------------------
+pub fn qmul(q0: Quat, q1: Quat) Quat {
+    var result = swizzle(q1, .w, .w, .w, .w);
+    var q1x = swizzle(q1, .x, .x, .x, .x);
+    var q1y = swizzle(q1, .y, .y, .y, .y);
+    var q1z = swizzle(q1, .z, .z, .z, .z);
+    result = result * q0;
+    var q0_shuf = swizzle(q0, .w, .z, .y, .x);
+    q1x = q1x * q0_shuf;
+    q0_shuf = swizzle(q0_shuf, .y, .x, .w, .z);
+    result = mulAdd(q1x, f32x4(1.0, -1.0, 1.0, -1.0), result);
+    q1y = q1y * q0_shuf;
+    q0_shuf = swizzle(q0_shuf, .w, .z, .y, .x);
+    q1y = q1y * f32x4(1.0, 1.0, -1.0, -1.0);
+    q1z = q1z * q0_shuf;
+    q1y = mulAdd(q1z, f32x4(-1.0, 1.0, 1.0, -1.0), q1y);
+    return result + q1y;
+}
+test "zmath.quaternion.mul" {
+    {
+        const q0 = f32x4(2.0, 3.0, 4.0, 1.0);
+        const q1 = f32x4(3.0, 2.0, 1.0, 4.0);
+        try expect(approxEqAbs(qmul(q0, q1), f32x4(16.0, 4.0, 22.0, -12.0), 0.0001));
+    }
+}
+
+pub fn quatToMat(quat: Quat) Mat {
+    return matFromQuat(quat);
+}
+
+pub fn quatToAxisAngle(quat: Quat, axis: *Vec, angle: *f32) void {
+    axis.* = quat;
+    angle.* = 2.0 * acos(quat[3]);
+}
+test "zmath.quaternion.quatToAxisAngle" {
+    {
+        const q0 = quatFromNormAxisAngle(f32x4(1.0, 0.0, 0.0, 0.0), 0.25 * math.pi);
+        var axis: Vec = f32x4(4.0, 3.0, 2.0, 1.0);
+        var angle: f32 = 10.0;
+        quatToAxisAngle(q0, &axis, &angle);
+        try expect(math.approxEqAbs(f32, axis[0], @sin(@as(f32, 0.25) * math.pi * 0.5), 0.0001));
+        try expect(axis[1] == 0.0);
+        try expect(axis[2] == 0.0);
+        try expect(math.approxEqAbs(f32, angle, 0.25 * math.pi, 0.0001));
+    }
+}
+
+pub fn quatFromMat(m: Mat) Quat {
+    const r0 = m[0];
+    const r1 = m[1];
+    const r2 = m[2];
+    const r00 = swizzle(r0, .x, .x, .x, .x);
+    const r11 = swizzle(r1, .y, .y, .y, .y);
+    const r22 = swizzle(r2, .z, .z, .z, .z);
+
+    const x2gey2 = (r11 - r00) <= splat(F32x4, 0.0);
+    const z2gew2 = (r11 + r00) <= splat(F32x4, 0.0);
+    const x2py2gez2pw2 = r22 <= splat(F32x4, 0.0);
+
+    var t0 = mulAdd(r00, f32x4(1.0, -1.0, -1.0, 1.0), splat(F32x4, 1.0));
+    var t1 = r11 * f32x4(-1.0, 1.0, -1.0, 1.0);
+    var t2 = mulAdd(r22, f32x4(-1.0, -1.0, 1.0, 1.0), t0);
+    const x2y2z2w2 = t1 + t2;
+
+    t0 = @shuffle(f32, r0, r1, [4]i32{ 1, 2, ~@as(i32, 2), ~@as(i32, 1) });
+    t1 = @shuffle(f32, r1, r2, [4]i32{ 0, 0, ~@as(i32, 0), ~@as(i32, 1) });
+    t1 = swizzle(t1, .x, .z, .w, .y);
+    const xyxzyz = t0 + t1;
+
+    t0 = @shuffle(f32, r2, r1, [4]i32{ 1, 0, ~@as(i32, 0), ~@as(i32, 0) });
+    t1 = @shuffle(f32, r1, r0, [4]i32{ 2, 2, ~@as(i32, 2), ~@as(i32, 1) });
+    t1 = swizzle(t1, .x, .z, .w, .y);
+    const xwywzw = (t0 - t1) * f32x4(-1.0, 1.0, -1.0, 1.0);
+
+    t0 = @shuffle(f32, x2y2z2w2, xyxzyz, [4]i32{ 0, 1, ~@as(i32, 0), ~@as(i32, 0) });
+    t1 = @shuffle(f32, x2y2z2w2, xwywzw, [4]i32{ 2, 3, ~@as(i32, 2), ~@as(i32, 0) });
+    t2 = @shuffle(f32, xyxzyz, xwywzw, [4]i32{ 1, 2, ~@as(i32, 0), ~@as(i32, 1) });
+
+    const tensor0 = @shuffle(f32, t0, t2, [4]i32{ 0, 2, ~@as(i32, 0), ~@as(i32, 2) });
+    const tensor1 = @shuffle(f32, t0, t2, [4]i32{ 2, 1, ~@as(i32, 1), ~@as(i32, 3) });
+    const tensor2 = @shuffle(f32, t2, t1, [4]i32{ 0, 1, ~@as(i32, 0), ~@as(i32, 2) });
+    const tensor3 = @shuffle(f32, t2, t1, [4]i32{ 2, 3, ~@as(i32, 2), ~@as(i32, 1) });
+
+    t0 = select(x2gey2, tensor0, tensor1);
+    t1 = select(z2gew2, tensor2, tensor3);
+    t2 = select(x2py2gez2pw2, t0, t1);
+
+    return t2 / length4(t2);
+}
+test "zmath.quatFromMat" {
+    {
+        const q0 = quatFromAxisAngle(f32x4(1.0, 0.0, 0.0, 0.0), 0.25 * math.pi);
+        const q1 = quatFromMat(rotationX(0.25 * math.pi));
+        try expect(approxEqAbs(q0, q1, 0.0001));
+    }
+    {
+        const q0 = quatFromAxisAngle(f32x4(1.0, 2.0, 0.5, 0.0), 0.25 * math.pi);
+        const q1 = quatFromMat(matFromAxisAngle(f32x4(1.0, 2.0, 0.5, 0.0), 0.25 * math.pi));
+        try expect(approxEqAbs(q0, q1, 0.0001));
+    }
+    {
+        const q0 = quatFromRollPitchYaw(0.1 * math.pi, -0.2 * math.pi, 0.3 * math.pi);
+        const q1 = quatFromMat(matFromRollPitchYaw(0.1 * math.pi, -0.2 * math.pi, 0.3 * math.pi));
+        try expect(approxEqAbs(q0, q1, 0.0001));
+    }
+}
+
+pub fn quatFromNormAxisAngle(axis: Vec, angle: f32) Quat {
+    var n = f32x4(axis[0], axis[1], axis[2], 1.0);
+    const sc = sincos(0.5 * angle);
+    return n * f32x4(sc[0], sc[0], sc[0], sc[1]);
+}
+pub fn quatFromAxisAngle(axis: Vec, angle: f32) Quat {
+    assert(!all(axis == splat(F32x4, 0.0), 3));
+    assert(!all(isInf(axis), 3));
+    const normal = normalize3(axis);
+    return quatFromNormAxisAngle(normal, angle);
+}
+test "zmath.quaternion.quatFromNormAxisAngle" {
+    {
+        const q0 = quatFromAxisAngle(f32x4(1.0, 0.0, 0.0, 0.0), 0.25 * math.pi);
+        const q1 = quatFromAxisAngle(f32x4(0.0, 1.0, 0.0, 0.0), 0.125 * math.pi);
+        const m0 = rotationX(0.25 * math.pi);
+        const m1 = rotationY(0.125 * math.pi);
+        const mr0 = quatToMat(qmul(q0, q1));
+        const mr1 = mul(m0, m1);
+        try expect(approxEqAbs(mr0[0], mr1[0], 0.0001));
+        try expect(approxEqAbs(mr0[1], mr1[1], 0.0001));
+        try expect(approxEqAbs(mr0[2], mr1[2], 0.0001));
+        try expect(approxEqAbs(mr0[3], mr1[3], 0.0001));
+    }
+    {
+        const m0 = quatToMat(quatFromAxisAngle(f32x4(1.0, 2.0, 0.5, 0.0), 0.25 * math.pi));
+        const m1 = matFromAxisAngle(f32x4(1.0, 2.0, 0.5, 0.0), 0.25 * math.pi);
+        try expect(approxEqAbs(m0[0], m1[0], 0.0001));
+        try expect(approxEqAbs(m0[1], m1[1], 0.0001));
+        try expect(approxEqAbs(m0[2], m1[2], 0.0001));
+        try expect(approxEqAbs(m0[3], m1[3], 0.0001));
+    }
+}
+
+pub inline fn qidentity() Quat {
+    return f32x4(@as(f32, 0.0), @as(f32, 0.0), @as(f32, 0.0), @as(f32, 1.0));
+}
+
+pub inline fn conjugate(quat: Quat) Quat {
+    return quat * f32x4(-1.0, -1.0, -1.0, 1.0);
+}
+
+fn inverseQuat(quat: Quat) Quat {
+    const l = lengthSq4(quat);
+    const conj = conjugate(quat);
+    return select(l <= splat(F32x4, math.f32_epsilon), splat(F32x4, 0.0), conj / l);
+}
+test "zmath.quaternion.inverseQuat" {
+    try expect(approxEqAbs(
+        inverse(f32x4(2.0, 3.0, 4.0, 1.0)),
+        f32x4(-1.0 / 15.0, -1.0 / 10.0, -2.0 / 15.0, 1.0 / 30.0),
+        0.0001,
+    ));
+    try expect(approxEqAbs(inverse(qidentity()), qidentity(), 0.0001));
+}
+
+pub fn slerp(q0: Quat, q1: Quat, t: f32) Quat {
+    return slerpV(q0, q1, splat(F32x4, t));
+}
+pub fn slerpV(q0: Quat, q1: Quat, t: F32x4) Quat {
+    var cos_omega = dot4(q0, q1);
+    const sign = select(cos_omega < splat(F32x4, 0.0), splat(F32x4, -1.0), splat(F32x4, 1.0));
+
+    cos_omega = cos_omega * sign;
+    const sin_omega = sqrt(splat(F32x4, 1.0) - cos_omega * cos_omega);
+
+    const omega = atan2(sin_omega, cos_omega);
+
+    var v01 = t;
+    v01 = xorInt(andInt(v01, f32x4_mask2), f32x4_sign_mask1);
+    v01 = f32x4(1.0, 0.0, 0.0, 0.0) + v01;
+
+    var s0 = sin(v01 * omega) / sin_omega;
+    s0 = select(cos_omega < splat(F32x4, 1.0 - 0.00001), s0, v01);
+
+    var s1 = swizzle(s0, .y, .y, .y, .y);
+    s0 = swizzle(s0, .x, .x, .x, .x);
+
+    return q0 * s0 + sign * q1 * s1;
+}
+test "zmath.quaternion.slerp" {
+    const from = f32x4(0.0, 0.0, 0.0, 1.0);
+    const to = f32x4(0.5, 0.5, -0.5, 0.5);
+    const result = slerp(from, to, 0.5);
+    try expect(approxEqAbs(result, f32x4(0.28867513, 0.28867513, -0.28867513, 0.86602540), 0.0001));
+}
+
+pub fn quatFromRollPitchYaw(pitch: f32, yaw: f32, roll: f32) Quat {
+    return quatFromRollPitchYawV(f32x4(pitch, yaw, roll, 0.0));
+}
+pub fn quatFromRollPitchYawV(angles: Vec) Quat { // | pitch | yaw | roll | 0 |
+    const sc = sincos(splat(Vec, 0.5) * angles);
+    const p0 = @shuffle(f32, sc[1], sc[0], [4]i32{ ~@as(i32, 0), 0, 0, 0 });
+    const p1 = @shuffle(f32, sc[0], sc[1], [4]i32{ ~@as(i32, 0), 0, 0, 0 });
+    const y0 = @shuffle(f32, sc[1], sc[0], [4]i32{ 1, ~@as(i32, 1), 1, 1 });
+    const y1 = @shuffle(f32, sc[0], sc[1], [4]i32{ 1, ~@as(i32, 1), 1, 1 });
+    const r0 = @shuffle(f32, sc[1], sc[0], [4]i32{ 2, 2, ~@as(i32, 2), 2 });
+    const r1 = @shuffle(f32, sc[0], sc[1], [4]i32{ 2, 2, ~@as(i32, 2), 2 });
+    const q1 = p1 * f32x4(1.0, -1.0, -1.0, 1.0) * y1;
+    const q0 = p0 * y0 * r0;
+    return mulAdd(q1, r1, q0);
+}
+test "zmath.quaternion.quatFromRollPitchYawV" {
+    {
+        const m0 = quatToMat(quatFromRollPitchYawV(f32x4(0.25 * math.pi, 0.0, 0.0, 0.0)));
+        const m1 = rotationX(0.25 * math.pi);
+        try expect(approxEqAbs(m0[0], m1[0], 0.0001));
+        try expect(approxEqAbs(m0[1], m1[1], 0.0001));
+        try expect(approxEqAbs(m0[2], m1[2], 0.0001));
+        try expect(approxEqAbs(m0[3], m1[3], 0.0001));
+    }
+    {
+        const m0 = quatToMat(quatFromRollPitchYaw(0.1 * math.pi, 0.2 * math.pi, 0.3 * math.pi));
+        const m1 = mul(
+            rotationZ(0.3 * math.pi),
+            mul(rotationX(0.1 * math.pi), rotationY(0.2 * math.pi)),
+        );
+        try expect(approxEqAbs(m0[0], m1[0], 0.0001));
+        try expect(approxEqAbs(m0[1], m1[1], 0.0001));
+        try expect(approxEqAbs(m0[2], m1[2], 0.0001));
+        try expect(approxEqAbs(m0[3], m1[3], 0.0001));
+    }
+}
+// ------------------------------------------------------------------------------
+//
+// 6. Color functions
+//
+// ------------------------------------------------------------------------------
+pub fn adjustSaturation(color: F32x4, saturation: f32) F32x4 {
+    const luminance = dot3(f32x4(0.2125, 0.7154, 0.0721, 0.0), color);
+    var result = mulAdd(color - luminance, f32x4s(saturation), luminance);
+    result[3] = color[3];
+    return result;
+}
+
+pub fn adjustContrast(color: F32x4, contrast: f32) F32x4 {
+    var result = mulAdd(color - f32x4s(0.5), f32x4s(contrast), f32x4s(0.5));
+    result[3] = color[3];
+    return result;
+}
+
+pub fn rgbToHsl(rgb: F32x4) F32x4 {
+    const r = swizzle(rgb, .x, .x, .x, .x);
+    const g = swizzle(rgb, .y, .y, .y, .y);
+    const b = swizzle(rgb, .z, .z, .z, .z);
+
+    const minv = min(r, min(g, b));
+    const maxv = max(r, max(g, b));
+
+    const l = (minv + maxv) * f32x4s(0.5);
+    const d = maxv - minv;
+    const la = select(boolx4(true, true, true, false), l, rgb);
+
+    if (all(d < f32x4s(math.f32_epsilon), 3)) {
+        return select(boolx4(true, true, false, false), f32x4s(0.0), la);
+    } else {
+        var s: F32x4 = undefined;
+        var h: F32x4 = undefined;
+
+        const d2 = minv + maxv;
+
+        if (all(l > f32x4s(0.5), 3)) {
+            s = d / (f32x4s(2.0) - d2);
+        } else {
+            s = d / d2;
+        }
+
+        if (all(r == maxv, 3)) {
+            h = (g - b) / d;
+        } else if (all(g == maxv, 3)) {
+            h = f32x4s(2.0) + (b - r) / d;
+        } else {
+            h = f32x4s(4.0) + (r - g) / d;
+        }
+
+        h /= f32x4s(6.0);
+
+        if (all(h < f32x4s(0.0), 3)) {
+            h += f32x4s(1.0);
+        }
+
+        const lha = select(boolx4(true, true, false, false), h, la);
+        return select(boolx4(true, false, true, true), lha, s);
+    }
+}
+test "zmath.color.rgbToHsl" {
+    try expect(approxEqAbs(rgbToHsl(f32x4(0.2, 0.4, 0.8, 1.0)), f32x4(0.6111, 0.6, 0.5, 1.0), 0.0001));
+    try expect(approxEqAbs(rgbToHsl(f32x4(1.0, 0.0, 0.0, 0.5)), f32x4(0.0, 1.0, 0.5, 0.5), 0.0001));
+    try expect(approxEqAbs(rgbToHsl(f32x4(0.0, 1.0, 0.0, 0.25)), f32x4(0.3333, 1.0, 0.5, 0.25), 0.0001));
+    try expect(approxEqAbs(rgbToHsl(f32x4(0.0, 0.0, 1.0, 1.0)), f32x4(0.6666, 1.0, 0.5, 1.0), 0.0001));
+    try expect(approxEqAbs(rgbToHsl(f32x4(0.0, 0.0, 0.0, 1.0)), f32x4(0.0, 0.0, 0.0, 1.0), 0.0001));
+    try expect(approxEqAbs(rgbToHsl(f32x4(1.0, 1.0, 1.0, 1.0)), f32x4(0.0, 0.0, 1.0, 1.0), 0.0001));
+}
+
+fn hueToClr(p: F32x4, q: F32x4, h: F32x4) F32x4 {
+    var t = h;
+
+    if (all(t < f32x4s(0.0), 3))
+        t += f32x4s(1.0);
+
+    if (all(t > f32x4s(1.0), 3))
+        t -= f32x4s(1.0);
+
+    if (all(t < f32x4s(1.0 / 6.0), 3))
+        return mulAdd(q - p, f32x4s(6.0) * t, p);
+
+    if (all(t < f32x4s(0.5), 3))
+        return q;
+
+    if (all(t < f32x4s(2.0 / 3.0), 3))
+        return mulAdd(q - p, f32x4s(6.0) * (f32x4s(2.0 / 3.0) - t), p);
+
+    return p;
+}
+
+pub fn hslToRgb(hsl: F32x4) F32x4 {
+    const s = swizzle(hsl, .y, .y, .y, .y);
+    const l = swizzle(hsl, .z, .z, .z, .z);
+
+    if (all(isNearEqual(s, f32x4s(0.0), f32x4s(math.f32_epsilon)), 3)) {
+        return select(boolx4(true, true, true, false), l, hsl);
+    } else {
+        const h = swizzle(hsl, .x, .x, .x, .x);
+        var q: F32x4 = undefined;
+        if (all(l < f32x4s(0.5), 3)) {
+            q = l * (f32x4s(1.0) + s);
+        } else {
+            q = (l + s) - (l * s);
+        }
+
+        const p = f32x4s(2.0) * l - q;
+
+        const r = hueToClr(p, q, h + f32x4s(1.0 / 3.0));
+        const g = hueToClr(p, q, h);
+        const b = hueToClr(p, q, h - f32x4s(1.0 / 3.0));
+
+        const rg = select(boolx4(true, false, false, false), r, g);
+        const ba = select(boolx4(true, true, true, false), b, hsl);
+        return select(boolx4(true, true, false, false), rg, ba);
+    }
+}
+test "zmath.color.hslToRgb" {
+    try expect(approxEqAbs(f32x4(0.2, 0.4, 0.8, 1.0), hslToRgb(f32x4(0.6111, 0.6, 0.5, 1.0)), 0.0001));
+    try expect(approxEqAbs(f32x4(1.0, 0.0, 0.0, 0.5), hslToRgb(f32x4(0.0, 1.0, 0.5, 0.5)), 0.0001));
+    try expect(approxEqAbs(f32x4(0.0, 1.0, 0.0, 0.25), hslToRgb(f32x4(0.3333, 1.0, 0.5, 0.25)), 0.0005));
+    try expect(approxEqAbs(f32x4(0.0, 0.0, 1.0, 1.0), hslToRgb(f32x4(0.6666, 1.0, 0.5, 1.0)), 0.0005));
+    try expect(approxEqAbs(f32x4(0.0, 0.0, 0.0, 1.0), hslToRgb(f32x4(0.0, 0.0, 0.0, 1.0)), 0.0001));
+    try expect(approxEqAbs(f32x4(1.0, 1.0, 1.0, 1.0), hslToRgb(f32x4(0.0, 0.0, 1.0, 1.0)), 0.0001));
+    try expect(approxEqAbs(hslToRgb(rgbToHsl(f32x4(1.0, 1.0, 1.0, 1.0))), f32x4(1.0, 1.0, 1.0, 1.0), 0.0005));
+    try expect(approxEqAbs(
+        hslToRgb(rgbToHsl(f32x4(0.82198, 0.1839, 0.632, 1.0))),
+        f32x4(0.82198, 0.1839, 0.632, 1.0),
+        0.0005,
+    ));
+    try expect(approxEqAbs(
+        rgbToHsl(hslToRgb(f32x4(0.82198, 0.1839, 0.632, 1.0))),
+        f32x4(0.82198, 0.1839, 0.632, 1.0),
+        0.0005,
+    ));
+    try expect(approxEqAbs(
+        rgbToHsl(hslToRgb(f32x4(0.1839, 0.82198, 0.632, 1.0))),
+        f32x4(0.1839, 0.82198, 0.632, 1.0),
+        0.0005,
+    ));
+    try expect(approxEqAbs(
+        hslToRgb(rgbToHsl(f32x4(0.1839, 0.632, 0.82198, 1.0))),
+        f32x4(0.1839, 0.632, 0.82198, 1.0),
+        0.0005,
+    ));
+}
+
+pub fn rgbToHsv(rgb: F32x4) F32x4 {
+    const r = swizzle(rgb, .x, .x, .x, .x);
+    const g = swizzle(rgb, .y, .y, .y, .y);
+    const b = swizzle(rgb, .z, .z, .z, .z);
+
+    const minv = min(r, min(g, b));
+    const v = max(r, max(g, b));
+    const d = v - minv;
+    const s = if (all(isNearEqual(v, f32x4s(0.0), f32x4s(math.f32_epsilon)), 3)) f32x4s(0.0) else d / v;
+
+    if (all(d < f32x4s(math.f32_epsilon), 3)) {
+        const hv = select(boolx4(true, false, false, false), f32x4s(0.0), v);
+        const hva = select(boolx4(true, true, true, false), hv, rgb);
+        return select(boolx4(true, false, true, true), hva, s);
+    } else {
+        var h: F32x4 = undefined;
+        if (all(r == v, 3)) {
+            h = (g - b) / d;
+            if (all(g < b, 3))
+                h += f32x4s(6.0);
+        } else if (all(g == v, 3)) {
+            h = f32x4s(2.0) + (b - r) / d;
+        } else {
+            h = f32x4s(4.0) + (r - g) / d;
+        }
+
+        h /= f32x4s(6.0);
+        const hv = select(boolx4(true, false, false, false), h, v);
+        const hva = select(boolx4(true, true, true, false), hv, rgb);
+        return select(boolx4(true, false, true, true), hva, s);
+    }
+}
+test "zmath.color.rgbToHsv" {
+    try expect(approxEqAbs(rgbToHsv(f32x4(0.2, 0.4, 0.8, 1.0)), f32x4(0.6111, 0.75, 0.8, 1.0), 0.0001));
+    try expect(approxEqAbs(rgbToHsv(f32x4(0.4, 0.2, 0.8, 1.0)), f32x4(0.7222, 0.75, 0.8, 1.0), 0.0001));
+    try expect(approxEqAbs(rgbToHsv(f32x4(0.4, 0.8, 0.2, 1.0)), f32x4(0.2777, 0.75, 0.8, 1.0), 0.0001));
+    try expect(approxEqAbs(rgbToHsv(f32x4(1.0, 0.0, 0.0, 0.5)), f32x4(0.0, 1.0, 1.0, 0.5), 0.0001));
+    try expect(approxEqAbs(rgbToHsv(f32x4(0.0, 1.0, 0.0, 0.25)), f32x4(0.3333, 1.0, 1.0, 0.25), 0.0001));
+    try expect(approxEqAbs(rgbToHsv(f32x4(0.0, 0.0, 1.0, 1.0)), f32x4(0.6666, 1.0, 1.0, 1.0), 0.0001));
+    try expect(approxEqAbs(rgbToHsv(f32x4(0.0, 0.0, 0.0, 1.0)), f32x4(0.0, 0.0, 0.0, 1.0), 0.0001));
+    try expect(approxEqAbs(rgbToHsv(f32x4(1.0, 1.0, 1.0, 1.0)), f32x4(0.0, 0.0, 1.0, 1.0), 0.0001));
+}
+
+pub fn hsvToRgb(hsv: F32x4) F32x4 {
+    const h = swizzle(hsv, .x, .x, .x, .x);
+    const s = swizzle(hsv, .y, .y, .y, .y);
+    const v = swizzle(hsv, .z, .z, .z, .z);
+
+    const h6 = h * f32x4s(6.0);
+    const i = floor(h6);
+    const f = h6 - i;
+
+    const p = v * (f32x4s(1.0) - s);
+    const q = v * (f32x4s(1.0) - f * s);
+    const t = v * (f32x4s(1.0) - (f32x4s(1.0) - f) * s);
+
+    const ii = @floatToInt(i32, mod(i, f32x4s(6.0))[0]);
+    const rgb = switch (ii) {
+        0 => blk: {
+            const vt = select(boolx4(true, false, false, false), v, t);
+            break :blk select(boolx4(true, true, false, false), vt, p);
+        },
+        1 => blk: {
+            const qv = select(boolx4(true, false, false, false), q, v);
+            break :blk select(boolx4(true, true, false, false), qv, p);
+        },
+        2 => blk: {
+            const pv = select(boolx4(true, false, false, false), p, v);
+            break :blk select(boolx4(true, true, false, false), pv, t);
+        },
+        3 => blk: {
+            const pq = select(boolx4(true, false, false, false), p, q);
+            break :blk select(boolx4(true, true, false, false), pq, v);
+        },
+        4 => blk: {
+            const tp = select(boolx4(true, false, false, false), t, p);
+            break :blk select(boolx4(true, true, false, false), tp, v);
+        },
+        5 => blk: {
+            const vp = select(boolx4(true, false, false, false), v, p);
+            break :blk select(boolx4(true, true, false, false), vp, q);
+        },
+        else => unreachable,
+    };
+    return select(boolx4(true, true, true, false), rgb, hsv);
+}
+test "zmath.color.hsvToRgb" {
+    const epsilon = 0.0005;
+    try expect(approxEqAbs(f32x4(0.2, 0.4, 0.8, 1.0), hsvToRgb(f32x4(0.6111, 0.75, 0.8, 1.0)), epsilon));
+    try expect(approxEqAbs(f32x4(0.4, 0.2, 0.8, 1.0), hsvToRgb(f32x4(0.7222, 0.75, 0.8, 1.0)), epsilon));
+    try expect(approxEqAbs(f32x4(0.4, 0.8, 0.2, 1.0), hsvToRgb(f32x4(0.2777, 0.75, 0.8, 1.0)), epsilon));
+    try expect(approxEqAbs(f32x4(1.0, 0.0, 0.0, 0.5), hsvToRgb(f32x4(0.0, 1.0, 1.0, 0.5)), epsilon));
+    try expect(approxEqAbs(f32x4(0.0, 1.0, 0.0, 0.25), hsvToRgb(f32x4(0.3333, 1.0, 1.0, 0.25)), epsilon));
+    try expect(approxEqAbs(f32x4(0.0, 0.0, 1.0, 1.0), hsvToRgb(f32x4(0.6666, 1.0, 1.0, 1.0)), epsilon));
+    try expect(approxEqAbs(f32x4(0.0, 0.0, 0.0, 1.0), hsvToRgb(f32x4(0.0, 0.0, 0.0, 1.0)), epsilon));
+    try expect(approxEqAbs(f32x4(1.0, 1.0, 1.0, 1.0), hsvToRgb(f32x4(0.0, 0.0, 1.0, 1.0)), epsilon));
+    try expect(approxEqAbs(
+        hsvToRgb(rgbToHsv(f32x4(0.1839, 0.632, 0.82198, 1.0))),
+        f32x4(0.1839, 0.632, 0.82198, 1.0),
+        epsilon,
+    ));
+    try expect(approxEqAbs(
+        hsvToRgb(rgbToHsv(f32x4(0.82198, 0.1839, 0.632, 1.0))),
+        f32x4(0.82198, 0.1839, 0.632, 1.0),
+        epsilon,
+    ));
+    try expect(approxEqAbs(
+        rgbToHsv(hsvToRgb(f32x4(0.82198, 0.1839, 0.632, 1.0))),
+        f32x4(0.82198, 0.1839, 0.632, 1.0),
+        epsilon,
+    ));
+    try expect(approxEqAbs(
+        rgbToHsv(hsvToRgb(f32x4(0.1839, 0.82198, 0.632, 1.0))),
+        f32x4(0.1839, 0.82198, 0.632, 1.0),
+        epsilon,
+    ));
+}
+
+pub fn rgbToSrgb(rgb: F32x4) F32x4 {
+    const static = struct {
+        const cutoff = f32x4(0.0031308, 0.0031308, 0.0031308, 1.0);
+        const linear = f32x4(12.92, 12.92, 12.92, 1.0);
+        const scale = f32x4(1.055, 1.055, 1.055, 1.0);
+        const bias = f32x4(0.055, 0.055, 0.055, 1.0);
+        const rgamma = 1.0 / 2.4;
+    };
+    var v = saturate(rgb);
+    const v0 = v * static.linear;
+    const v1 = static.scale * f32x4(
+        math.pow(f32, v[0], static.rgamma),
+        math.pow(f32, v[1], static.rgamma),
+        math.pow(f32, v[2], static.rgamma),
+        v[3],
+    ) - static.bias;
+    v = select(v < static.cutoff, v0, v1);
+    return select(boolx4(true, true, true, false), v, rgb);
+}
+test "zmath.color.rgbToSrgb" {
+    const epsilon = 0.001;
+    try expect(approxEqAbs(rgbToSrgb(f32x4(0.2, 0.4, 0.8, 1.0)), f32x4(0.484, 0.665, 0.906, 1.0), epsilon));
+}
+
+pub fn srgbToRgb(srgb: F32x4) F32x4 {
+    const static = struct {
+        const cutoff = f32x4(0.04045, 0.04045, 0.04045, 1.0);
+        const rlinear = f32x4(1.0 / 12.92, 1.0 / 12.92, 1.0 / 12.92, 1.0);
+        const scale = f32x4(1.0 / 1.055, 1.0 / 1.055, 1.0 / 1.055, 1.0);
+        const bias = f32x4(0.055, 0.055, 0.055, 1.0);
+        const gamma = 2.4;
+    };
+    var v = saturate(srgb);
+    const v0 = v * static.rlinear;
+    var v1 = static.scale * (v + static.bias);
+    v1 = f32x4(
+        math.pow(f32, v1[0], static.gamma),
+        math.pow(f32, v1[1], static.gamma),
+        math.pow(f32, v1[2], static.gamma),
+        v1[3],
+    );
+    v = select(v > static.cutoff, v1, v0);
+    return select(boolx4(true, true, true, false), v, srgb);
+}
+test "zmath.color.srgbToRgb" {
+    const epsilon = 0.0007;
+    try expect(approxEqAbs(f32x4(0.2, 0.4, 0.8, 1.0), srgbToRgb(f32x4(0.484, 0.665, 0.906, 1.0)), epsilon));
+    try expect(approxEqAbs(
+        rgbToSrgb(srgbToRgb(f32x4(0.1839, 0.82198, 0.632, 1.0))),
+        f32x4(0.1839, 0.82198, 0.632, 1.0),
+        epsilon,
+    ));
+}
+// ------------------------------------------------------------------------------
+//
+// X. Misc functions
+//
+// ------------------------------------------------------------------------------
+pub fn linePointDistance(linept0: Vec, linept1: Vec, pt: Vec) F32x4 {
+    const ptvec = pt - linept0;
+    const linevec = linept1 - linept0;
+    const scale = dot3(ptvec, linevec) / lengthSq3(linevec);
+    return length3(ptvec - linevec * scale);
+}
+test "zmath.linePointDistance" {
+    {
+        const linept0 = f32x4(-1.0, -2.0, -3.0, 1.0);
+        const linept1 = f32x4(1.0, 2.0, 3.0, 1.0);
+        const pt = f32x4(1.0, 1.0, 1.0, 1.0);
+        var v = linePointDistance(linept0, linept1, pt);
+        try expect(approxEqAbs(v, splat(F32x4, 0.654), 0.001));
+    }
+}
+
+fn sin32(v: f32) f32 {
+    var y = v - math.tau * @round(v * 1.0 / math.tau);
+
+    if (y > 0.5 * math.pi) {
+        y = math.pi - y;
+    } else if (y < -math.pi * 0.5) {
+        y = -math.pi - y;
+    }
+    const y2 = y * y;
+
+    // 11-degree minimax approximation
+    var sinv = mulAdd(@as(f32, -2.3889859e-08), y2, 2.7525562e-06);
+    sinv = mulAdd(sinv, y2, -0.00019840874);
+    sinv = mulAdd(sinv, y2, 0.0083333310);
+    sinv = mulAdd(sinv, y2, -0.16666667);
+    return y * mulAdd(sinv, y2, 1.0);
+}
+fn cos32(v: f32) f32 {
+    var y = v - math.tau * @round(v * 1.0 / math.tau);
+
+    const sign = blk: {
+        if (y > 0.5 * math.pi) {
+            y = math.pi - y;
+            break :blk @as(f32, -1.0);
+        } else if (y < -math.pi * 0.5) {
+            y = -math.pi - y;
+            break :blk @as(f32, -1.0);
+        } else {
+            break :blk @as(f32, 1.0);
+        }
+    };
+    const y2 = y * y;
+
+    // 10-degree minimax approximation
+    var cosv = mulAdd(@as(f32, -2.6051615e-07), y2, 2.4760495e-05);
+    cosv = mulAdd(cosv, y2, -0.0013888378);
+    cosv = mulAdd(cosv, y2, 0.041666638);
+    cosv = mulAdd(cosv, y2, -0.5);
+    return sign * mulAdd(cosv, y2, 1.0);
+}
+fn sincos32(v: f32) [2]f32 {
+    var y = v - math.tau * @round(v * 1.0 / math.tau);
+
+    const sign = blk: {
+        if (y > 0.5 * math.pi) {
+            y = math.pi - y;
+            break :blk @as(f32, -1.0);
+        } else if (y < -math.pi * 0.5) {
+            y = -math.pi - y;
+            break :blk @as(f32, -1.0);
+        } else {
+            break :blk @as(f32, 1.0);
+        }
+    };
+    const y2 = y * y;
+
+    // 11-degree minimax approximation
+    var sinv = mulAdd(@as(f32, -2.3889859e-08), y2, 2.7525562e-06);
+    sinv = mulAdd(sinv, y2, -0.00019840874);
+    sinv = mulAdd(sinv, y2, 0.0083333310);
+    sinv = mulAdd(sinv, y2, -0.16666667);
+    sinv = y * mulAdd(sinv, y2, 1.0);
+
+    // 10-degree minimax approximation
+    var cosv = mulAdd(@as(f32, -2.6051615e-07), y2, 2.4760495e-05);
+    cosv = mulAdd(cosv, y2, -0.0013888378);
+    cosv = mulAdd(cosv, y2, 0.041666638);
+    cosv = mulAdd(cosv, y2, -0.5);
+    cosv = sign * mulAdd(cosv, y2, 1.0);
+
+    return .{ sinv, cosv };
+}
+test "zmath.sincos32" {
+    const epsilon = 0.0001;
+
+    try expect(math.isNan(sincos32(math.inf_f32)[0]));
+    try expect(math.isNan(sincos32(math.inf_f32)[1]));
+    try expect(math.isNan(sincos32(-math.inf_f32)[0]));
+    try expect(math.isNan(sincos32(-math.inf_f32)[1]));
+    try expect(math.isNan(sincos32(math.nan_f32)[0]));
+    try expect(math.isNan(sincos32(-math.nan_f32)[1]));
+
+    try expect(math.isNan(sin32(math.inf_f32)));
+    try expect(math.isNan(cos32(math.inf_f32)));
+    try expect(math.isNan(sin32(-math.inf_f32)));
+    try expect(math.isNan(cos32(-math.inf_f32)));
+    try expect(math.isNan(sin32(math.nan_f32)));
+    try expect(math.isNan(cos32(-math.nan_f32)));
+
+    var f: f32 = -100.0;
+    var i: u32 = 0;
+    while (i < 100) : (i += 1) {
+        const sc = sincos32(f);
+        const s0 = sin32(f);
+        const c0 = cos32(f);
+        const s = @sin(f);
+        const c = @cos(f);
+        try expect(math.approxEqAbs(f32, sc[0], s, epsilon));
+        try expect(math.approxEqAbs(f32, sc[1], c, epsilon));
+        try expect(math.approxEqAbs(f32, s0, s, epsilon));
+        try expect(math.approxEqAbs(f32, c0, c, epsilon));
+        f += 0.12345 * @intToFloat(f32, i);
+    }
+}
+
+fn asin32(v: f32) f32 {
+    const x = @fabs(v);
+    var omx = 1.0 - x;
+    if (omx < 0.0) {
+        omx = 0.0;
+    }
+    const root = @sqrt(omx);
+
+    // 7-degree minimax approximation
+    var result = mulAdd(@as(f32, -0.0012624911), x, 0.0066700901);
+    result = mulAdd(result, x, -0.0170881256);
+    result = mulAdd(result, x, 0.0308918810);
+    result = mulAdd(result, x, -0.0501743046);
+    result = mulAdd(result, x, 0.0889789874);
+    result = mulAdd(result, x, -0.2145988016);
+    result = root * mulAdd(result, x, 1.5707963050);
+
+    return if (v >= 0.0) 0.5 * math.pi - result else result - 0.5 * math.pi;
+}
+test "zmath.asin32" {
+    const epsilon = 0.0001;
+
+    try expect(math.approxEqAbs(f32, asin(@as(f32, -1.1)), -0.5 * math.pi, epsilon));
+    try expect(math.approxEqAbs(f32, asin(@as(f32, 1.1)), 0.5 * math.pi, epsilon));
+    try expect(math.approxEqAbs(f32, asin(@as(f32, -1000.1)), -0.5 * math.pi, epsilon));
+    try expect(math.approxEqAbs(f32, asin(@as(f32, 100000.1)), 0.5 * math.pi, epsilon));
+    try expect(math.isNan(asin(math.inf_f32)));
+    try expect(math.isNan(asin(-math.inf_f32)));
+    try expect(math.isNan(asin(math.nan_f32)));
+    try expect(math.isNan(asin(-math.nan_f32)));
+
+    try expect(approxEqAbs(asin(splat(F32x8, -100.0)), splat(F32x8, -0.5 * math.pi), epsilon));
+    try expect(approxEqAbs(asin(splat(F32x16, 100.0)), splat(F32x16, 0.5 * math.pi), epsilon));
+    try expect(all(isNan(asin(splat(F32x4, math.inf_f32))), 0) == true);
+    try expect(all(isNan(asin(splat(F32x4, -math.inf_f32))), 0) == true);
+    try expect(all(isNan(asin(splat(F32x4, math.nan_f32))), 0) == true);
+    try expect(all(isNan(asin(splat(F32x4, math.qnan_f32))), 0) == true);
+
+    var f: f32 = -1.0;
+    var i: u32 = 0;
+    while (i < 8) : (i += 1) {
+        const r0 = asin32(f);
+        const r1 = math.asin(f);
+        const r4 = asin(splat(F32x4, f));
+        const r8 = asin(splat(F32x8, f));
+        const r16 = asin(splat(F32x16, f));
+        try expect(math.approxEqAbs(f32, r0, r1, epsilon));
+        try expect(approxEqAbs(r4, splat(F32x4, r1), epsilon));
+        try expect(approxEqAbs(r8, splat(F32x8, r1), epsilon));
+        try expect(approxEqAbs(r16, splat(F32x16, r1), epsilon));
+        f += 0.09 * @intToFloat(f32, i);
+    }
+}
+
+fn acos32(v: f32) f32 {
+    const x = @fabs(v);
+    var omx = 1.0 - x;
+    if (omx < 0.0) {
+        omx = 0.0;
+    }
+    const root = @sqrt(omx);
+
+    // 7-degree minimax approximation
+    var result = mulAdd(@as(f32, -0.0012624911), x, 0.0066700901);
+    result = mulAdd(result, x, -0.0170881256);
+    result = mulAdd(result, x, 0.0308918810);
+    result = mulAdd(result, x, -0.0501743046);
+    result = mulAdd(result, x, 0.0889789874);
+    result = mulAdd(result, x, -0.2145988016);
+    result = root * mulAdd(result, x, 1.5707963050);
+
+    return if (v >= 0.0) result else math.pi - result;
+}
+test "zmath.acos32" {
+    const epsilon = 0.1;
+
+    try expect(math.approxEqAbs(f32, acos(@as(f32, -1.1)), math.pi, epsilon));
+    try expect(math.approxEqAbs(f32, acos(@as(f32, -10000.1)), math.pi, epsilon));
+    try expect(math.approxEqAbs(f32, acos(@as(f32, 1.1)), 0.0, epsilon));
+    try expect(math.approxEqAbs(f32, acos(@as(f32, 1000.1)), 0.0, epsilon));
+    try expect(math.isNan(acos(math.inf_f32)));
+    try expect(math.isNan(acos(-math.inf_f32)));
+    try expect(math.isNan(acos(math.nan_f32)));
+    try expect(math.isNan(acos(-math.nan_f32)));
+
+    try expect(approxEqAbs(acos(splat(F32x8, -100.0)), splat(F32x8, math.pi), epsilon));
+    try expect(approxEqAbs(acos(splat(F32x16, 100.0)), splat(F32x16, 0.0), epsilon));
+    try expect(all(isNan(acos(splat(F32x4, math.inf_f32))), 0) == true);
+    try expect(all(isNan(acos(splat(F32x4, -math.inf_f32))), 0) == true);
+    try expect(all(isNan(acos(splat(F32x4, math.nan_f32))), 0) == true);
+    try expect(all(isNan(acos(splat(F32x4, math.qnan_f32))), 0) == true);
+
+    var f: f32 = -1.0;
+    var i: u32 = 0;
+    while (i < 8) : (i += 1) {
+        const r0 = acos32(f);
+        const r1 = math.acos(f);
+        const r4 = acos(splat(F32x4, f));
+        const r8 = acos(splat(F32x8, f));
+        const r16 = acos(splat(F32x16, f));
+        try expect(math.approxEqAbs(f32, r0, r1, epsilon));
+        try expect(approxEqAbs(r4, splat(F32x4, r1), epsilon));
+        try expect(approxEqAbs(r8, splat(F32x8, r1), epsilon));
+        try expect(approxEqAbs(r16, splat(F32x16, r1), epsilon));
+        f += 0.09 * @intToFloat(f32, i);
+    }
+}
+
+pub fn modAngle32(in_angle: f32) f32 {
+    const angle = in_angle + math.pi;
+    var temp: f32 = @fabs(angle);
+    temp = temp - (2.0 * math.pi * @intToFloat(f32, @floatToInt(i32, temp / math.pi)));
+    temp = temp - math.pi;
+    if (angle < 0.0) {
+        temp = -temp;
+    }
+    return temp;
+}
+
+pub fn cmulSoa(re0: anytype, im0: anytype, re1: anytype, im1: anytype) [2]@TypeOf(re0, im0, re1, im1) {
+    const re0_re1 = re0 * re1;
+    const re0_im1 = re0 * im1;
+    return .{
+        mulAdd(-im0, im1, re0_re1), // re
+        mulAdd(re1, im0, re0_im1), // im
+    };
+}
+// ------------------------------------------------------------------------------
+//
+// FFT (implementation based on xdsp.h from DirectXMath)
+//
+// ------------------------------------------------------------------------------
+fn fftButterflyDit4_1(re0: *F32x4, im0: *F32x4) void {
+    const re0l = swizzle(re0.*, .x, .x, .y, .y);
+    const re0h = swizzle(re0.*, .z, .z, .w, .w);
+
+    const im0l = swizzle(im0.*, .x, .x, .y, .y);
+    const im0h = swizzle(im0.*, .z, .z, .w, .w);
+
+    const re_temp = mulAdd(re0h, f32x4(1.0, -1.0, 1.0, -1.0), re0l);
+    const im_temp = mulAdd(im0h, f32x4(1.0, -1.0, 1.0, -1.0), im0l);
+
+    const re_shuf0 = @shuffle(f32, re_temp, im_temp, [4]i32{ 2, 3, ~@as(i32, 2), ~@as(i32, 3) });
+    const re_shuf = swizzle(re_shuf0, .x, .w, .x, .w);
+    const im_shuf = swizzle(re_shuf0, .z, .y, .z, .y);
+
+    const re_templ = swizzle(re_temp, .x, .y, .x, .y);
+    const im_templ = swizzle(im_temp, .x, .y, .x, .y);
+
+    re0.* = mulAdd(re_shuf, f32x4(1.0, 1.0, -1.0, -1.0), re_templ);
+    im0.* = mulAdd(im_shuf, f32x4(1.0, -1.0, -1.0, 1.0), im_templ);
+}
+
+fn fftButterflyDit4_4(
+    re0: *F32x4,
+    re1: *F32x4,
+    re2: *F32x4,
+    re3: *F32x4,
+    im0: *F32x4,
+    im1: *F32x4,
+    im2: *F32x4,
+    im3: *F32x4,
+    unity_table_re: []const F32x4,
+    unity_table_im: []const F32x4,
+    stride: u32,
+    last: bool,
+) void {
+    const re_temp0 = re0.* + re2.*;
+    const im_temp0 = im0.* + im2.*;
+
+    const re_temp2 = re1.* + re3.*;
+    const im_temp2 = im1.* + im3.*;
+
+    const re_temp1 = re0.* - re2.*;
+    const im_temp1 = im0.* - im2.*;
+
+    const re_temp3 = re1.* - re3.*;
+    const im_temp3 = im1.* - im3.*;
+
+    var re_temp4 = re_temp0 + re_temp2;
+    var im_temp4 = im_temp0 + im_temp2;
+
+    var re_temp5 = re_temp1 + im_temp3;
+    var im_temp5 = im_temp1 - re_temp3;
+
+    var re_temp6 = re_temp0 - re_temp2;
+    var im_temp6 = im_temp0 - im_temp2;
+
+    var re_temp7 = re_temp1 - im_temp3;
+    var im_temp7 = im_temp1 + re_temp3;
+
+    {
+        const re_im = cmulSoa(re_temp5, im_temp5, unity_table_re[stride], unity_table_im[stride]);
+        re_temp5 = re_im[0];
+        im_temp5 = re_im[1];
+    }
+    {
+        const re_im = cmulSoa(re_temp6, im_temp6, unity_table_re[stride * 2], unity_table_im[stride * 2]);
+        re_temp6 = re_im[0];
+        im_temp6 = re_im[1];
+    }
+    {
+        const re_im = cmulSoa(re_temp7, im_temp7, unity_table_re[stride * 3], unity_table_im[stride * 3]);
+        re_temp7 = re_im[0];
+        im_temp7 = re_im[1];
+    }
+
+    if (last) {
+        fftButterflyDit4_1(&re_temp4, &im_temp4);
+        fftButterflyDit4_1(&re_temp5, &im_temp5);
+        fftButterflyDit4_1(&re_temp6, &im_temp6);
+        fftButterflyDit4_1(&re_temp7, &im_temp7);
+    }
+
+    re0.* = re_temp4;
+    im0.* = im_temp4;
+
+    re1.* = re_temp5;
+    im1.* = im_temp5;
+
+    re2.* = re_temp6;
+    im2.* = im_temp6;
+
+    re3.* = re_temp7;
+    im3.* = im_temp7;
+}
+
+fn fft4(re: []F32x4, im: []F32x4, count: u32) void {
+    assert(std.math.isPowerOfTwo(count));
+    assert(re.len >= count);
+    assert(im.len >= count);
+
+    var index: u32 = 0;
+    while (index < count) : (index += 1) {
+        fftButterflyDit4_1(&re[index], &im[index]);
+    }
+}
+test "zmath.fft4" {
+    const epsilon = 0.0001;
+    var re = [_]F32x4{f32x4(1.0, 2.0, 3.0, 4.0)};
+    var im = [_]F32x4{f32x4s(0.0)};
+    fft4(re[0..], im[0..], 1);
+
+    var re_uns: [1]F32x4 = undefined;
+    var im_uns: [1]F32x4 = undefined;
+    fftUnswizzle(re[0..], re_uns[0..]);
+    fftUnswizzle(im[0..], im_uns[0..]);
+
+    try expect(approxEqAbs(re_uns[0], f32x4(10.0, -2.0, -2.0, -2.0), epsilon));
+    try expect(approxEqAbs(im_uns[0], f32x4(0.0, 2.0, 0.0, -2.0), epsilon));
+}
+
+fn fft8(re: []F32x4, im: []F32x4, count: u32) void {
+    assert(std.math.isPowerOfTwo(count));
+    assert(re.len >= 2 * count);
+    assert(im.len >= 2 * count);
+
+    var index: u32 = 0;
+    while (index < count) : (index += 1) {
+        var pre = re[index * 2 ..];
+        var pim = im[index * 2 ..];
+
+        var odds_re = @shuffle(f32, pre[0], pre[1], [4]i32{ 1, 3, ~@as(i32, 1), ~@as(i32, 3) });
+        var evens_re = @shuffle(f32, pre[0], pre[1], [4]i32{ 0, 2, ~@as(i32, 0), ~@as(i32, 2) });
+        var odds_im = @shuffle(f32, pim[0], pim[1], [4]i32{ 1, 3, ~@as(i32, 1), ~@as(i32, 3) });
+        var evens_im = @shuffle(f32, pim[0], pim[1], [4]i32{ 0, 2, ~@as(i32, 0), ~@as(i32, 2) });
+        fftButterflyDit4_1(&odds_re, &odds_im);
+        fftButterflyDit4_1(&evens_re, &evens_im);
+
+        {
+            const re_im = cmulSoa(
+                odds_re,
+                odds_im,
+                f32x4(1.0, 0.70710677, 0.0, -0.70710677),
+                f32x4(0.0, -0.70710677, -1.0, -0.70710677),
+            );
+            pre[0] = evens_re + re_im[0];
+            pim[0] = evens_im + re_im[1];
+        }
+        {
+            const re_im = cmulSoa(
+                odds_re,
+                odds_im,
+                f32x4(-1.0, -0.70710677, 0.0, 0.70710677),
+                f32x4(0.0, 0.70710677, 1.0, 0.70710677),
+            );
+            pre[1] = evens_re + re_im[0];
+            pim[1] = evens_im + re_im[1];
+        }
+    }
+}
+test "zmath.fft8" {
+    const epsilon = 0.0001;
+    var re = [_]F32x4{ f32x4(1.0, 2.0, 3.0, 4.0), f32x4(5.0, 6.0, 7.0, 8.0) };
+    var im = [_]F32x4{ f32x4s(0.0), f32x4s(0.0) };
+    fft8(re[0..], im[0..], 1);
+
+    var re_uns: [2]F32x4 = undefined;
+    var im_uns: [2]F32x4 = undefined;
+    fftUnswizzle(re[0..], re_uns[0..]);
+    fftUnswizzle(im[0..], im_uns[0..]);
+
+    try expect(approxEqAbs(re_uns[0], f32x4(36.0, -4.0, -4.0, -4.0), epsilon));
+    try expect(approxEqAbs(re_uns[1], f32x4(-4.0, -4.0, -4.0, -4.0), epsilon));
+    try expect(approxEqAbs(im_uns[0], f32x4(0.0, 9.656854, 4.0, 1.656854), epsilon));
+    try expect(approxEqAbs(im_uns[1], f32x4(0.0, -1.656854, -4.0, -9.656854), epsilon));
+}
+
+fn fft16(re: []F32x4, im: []F32x4, count: u32) void {
+    assert(std.math.isPowerOfTwo(count));
+    assert(re.len >= 4 * count);
+    assert(im.len >= 4 * count);
+
+    const static = struct {
+        const unity_table_re = [4]F32x4{
+            f32x4(1.0, 1.0, 1.0, 1.0),
+            f32x4(1.0, 0.92387950, 0.70710677, 0.38268343),
+            f32x4(1.0, 0.70710677, -4.3711388e-008, -0.70710677),
+            f32x4(1.0, 0.38268343, -0.70710677, -0.92387950),
+        };
+        const unity_table_im = [4]F32x4{
+            f32x4(-0.0, -0.0, -0.0, -0.0),
+            f32x4(-0.0, -0.38268343, -0.70710677, -0.92387950),
+            f32x4(-0.0, -0.70710677, -1.0, -0.70710677),
+            f32x4(-0.0, -0.92387950, -0.70710677, 0.38268343),
+        };
+    };
+
+    var index: u32 = 0;
+    while (index < count) : (index += 1) {
+        fftButterflyDit4_4(
+            &re[index * 4],
+            &re[index * 4 + 1],
+            &re[index * 4 + 2],
+            &re[index * 4 + 3],
+            &im[index * 4],
+            &im[index * 4 + 1],
+            &im[index * 4 + 2],
+            &im[index * 4 + 3],
+            static.unity_table_re[0..],
+            static.unity_table_im[0..],
+            1,
+            true,
+        );
+    }
+}
+test "zmath.fft16" {
+    const epsilon = 0.0001;
+    var re = [_]F32x4{
+        f32x4(1.0, 2.0, 3.0, 4.0),
+        f32x4(5.0, 6.0, 7.0, 8.0),
+        f32x4(9.0, 10.0, 11.0, 12.0),
+        f32x4(13.0, 14.0, 15.0, 16.0),
+    };
+    var im = [_]F32x4{ f32x4s(0.0), f32x4s(0.0), f32x4s(0.0), f32x4s(0.0) };
+    fft16(re[0..], im[0..], 1);
+
+    var re_uns: [4]F32x4 = undefined;
+    var im_uns: [4]F32x4 = undefined;
+    fftUnswizzle(re[0..], re_uns[0..]);
+    fftUnswizzle(im[0..], im_uns[0..]);
+
+    try expect(approxEqAbs(re_uns[0], f32x4(136.0, -8.0, -8.0, -8.0), epsilon));
+    try expect(approxEqAbs(re_uns[1], f32x4(-8.0, -8.0, -8.0, -8.0), epsilon));
+    try expect(approxEqAbs(re_uns[2], f32x4(-8.0, -8.0, -8.0, -8.0), epsilon));
+    try expect(approxEqAbs(re_uns[3], f32x4(-8.0, -8.0, -8.0, -8.0), epsilon));
+    try expect(approxEqAbs(im_uns[0], f32x4(0.0, 40.218716, 19.313708, 11.972846), epsilon));
+    try expect(approxEqAbs(im_uns[1], f32x4(8.0, 5.345429, 3.313708, 1.591299), epsilon));
+    try expect(approxEqAbs(im_uns[2], f32x4(0.0, -1.591299, -3.313708, -5.345429), epsilon));
+    try expect(approxEqAbs(im_uns[3], f32x4(-8.0, -11.972846, -19.313708, -40.218716), epsilon));
+}
+
+fn fftN(re: []F32x4, im: []F32x4, unity_table: []const F32x4, length: u32, count: u32) void {
+    assert(length > 16);
+    assert(std.math.isPowerOfTwo(length));
+    assert(std.math.isPowerOfTwo(count));
+    assert(re.len >= length * count / 4);
+    assert(re.len == im.len);
+
+    const total = count * length;
+    const total_vectors = total / 4;
+    const stage_vectors = length / 4;
+    const stage_vectors_mask = stage_vectors - 1;
+    const stride = length / 16;
+    const stride_mask = stride - 1;
+    const stride_inv_mask = ~stride_mask;
+
+    var unity_table_re = unity_table;
+    var unity_table_im = unity_table[length / 4 ..];
+
+    var index: u32 = 0;
+    while (index < total_vectors / 4) : (index += 1) {
+        const n = (index & stride_inv_mask) * 4 + (index & stride_mask);
+        fftButterflyDit4_4(
+            &re[n],
+            &re[n + stride],
+            &re[n + stride * 2],
+            &re[n + stride * 3],
+            &im[n],
+            &im[n + stride],
+            &im[n + stride * 2],
+            &im[n + stride * 3],
+            unity_table_re[(n & stage_vectors_mask)..],
+            unity_table_im[(n & stage_vectors_mask)..],
+            stride,
+            false,
+        );
+    }
+
+    if (length > 16 * 4) {
+        fftN(re, im, unity_table[(length / 2)..], length / 4, count * 4);
+    } else if (length == 16 * 4) {
+        fft16(re, im, count * 4);
+    } else if (length == 8 * 4) {
+        fft8(re, im, count * 4);
+    } else if (length == 4 * 4) {
+        fft4(re, im, count * 4);
+    }
+}
+test "zmath.fftN" {
+    var unity_table: [128]F32x4 = undefined;
+    const epsilon = 0.0001;
+
+    // 32 samples
+    {
+        var re = [_]F32x4{
+            f32x4(1.0, 2.0, 3.0, 4.0),     f32x4(5.0, 6.0, 7.0, 8.0),
+            f32x4(9.0, 10.0, 11.0, 12.0),  f32x4(13.0, 14.0, 15.0, 16.0),
+            f32x4(17.0, 18.0, 19.0, 20.0), f32x4(21.0, 22.0, 23.0, 24.0),
+            f32x4(25.0, 26.0, 27.0, 28.0), f32x4(29.0, 30.0, 31.0, 32.0),
+        };
+        var im = [_]F32x4{
+            f32x4s(0.0), f32x4s(0.0), f32x4s(0.0), f32x4s(0.0),
+            f32x4s(0.0), f32x4s(0.0), f32x4s(0.0), f32x4s(0.0),
+        };
+
+        fftInitUnityTable(unity_table[0..32]);
+        fft(re[0..], im[0..], unity_table[0..32]);
+
+        try expect(approxEqAbs(re[0], f32x4(528.0, -16.0, -16.0, -16.0), epsilon));
+        try expect(approxEqAbs(re[1], f32x4(-16.0, -16.0, -16.0, -16.0), epsilon));
+        try expect(approxEqAbs(re[2], f32x4(-16.0, -16.0, -16.0, -16.0), epsilon));
+        try expect(approxEqAbs(re[3], f32x4(-16.0, -16.0, -16.0, -16.0), epsilon));
+        try expect(approxEqAbs(re[4], f32x4(-16.0, -16.0, -16.0, -16.0), epsilon));
+        try expect(approxEqAbs(re[5], f32x4(-16.0, -16.0, -16.0, -16.0), epsilon));
+        try expect(approxEqAbs(re[6], f32x4(-16.0, -16.0, -16.0, -16.0), epsilon));
+        try expect(approxEqAbs(re[7], f32x4(-16.0, -16.0, -16.0, -16.0), epsilon));
+        try expect(approxEqAbs(im[0], f32x4(0.0, 162.450726, 80.437432, 52.744931), epsilon));
+        try expect(approxEqAbs(im[1], f32x4(38.627417, 29.933895, 23.945692, 19.496056), epsilon));
+        try expect(approxEqAbs(im[2], f32x4(16.0, 13.130861, 10.690858, 8.552178), epsilon));
+        try expect(approxEqAbs(im[3], f32x4(6.627417, 4.853547, 3.182598, 1.575862), epsilon));
+        try expect(approxEqAbs(im[4], f32x4(0.0, -1.575862, -3.182598, -4.853547), epsilon));
+        try expect(approxEqAbs(im[5], f32x4(-6.627417, -8.552178, -10.690858, -13.130861), epsilon));
+        try expect(approxEqAbs(im[6], f32x4(-16.0, -19.496056, -23.945692, -29.933895), epsilon));
+        try expect(approxEqAbs(im[7], f32x4(-38.627417, -52.744931, -80.437432, -162.450726), epsilon));
+    }
+
+    // 64 samples
+    {
+        var re = [_]F32x4{
+            f32x4(1.0, 2.0, 3.0, 4.0),     f32x4(5.0, 6.0, 7.0, 8.0),
+            f32x4(9.0, 10.0, 11.0, 12.0),  f32x4(13.0, 14.0, 15.0, 16.0),
+            f32x4(17.0, 18.0, 19.0, 20.0), f32x4(21.0, 22.0, 23.0, 24.0),
+            f32x4(25.0, 26.0, 27.0, 28.0), f32x4(29.0, 30.0, 31.0, 32.0),
+            f32x4(1.0, 2.0, 3.0, 4.0),     f32x4(5.0, 6.0, 7.0, 8.0),
+            f32x4(9.0, 10.0, 11.0, 12.0),  f32x4(13.0, 14.0, 15.0, 16.0),
+            f32x4(17.0, 18.0, 19.0, 20.0), f32x4(21.0, 22.0, 23.0, 24.0),
+            f32x4(25.0, 26.0, 27.0, 28.0), f32x4(29.0, 30.0, 31.0, 32.0),
+        };
+        var im = [_]F32x4{
+            f32x4s(0.0), f32x4s(0.0), f32x4s(0.0), f32x4s(0.0),
+            f32x4s(0.0), f32x4s(0.0), f32x4s(0.0), f32x4s(0.0),
+            f32x4s(0.0), f32x4s(0.0), f32x4s(0.0), f32x4s(0.0),
+            f32x4s(0.0), f32x4s(0.0), f32x4s(0.0), f32x4s(0.0),
+        };
+
+        fftInitUnityTable(unity_table[0..64]);
+        fft(re[0..], im[0..], unity_table[0..64]);
+
+        try expect(approxEqAbs(re[0], f32x4(1056.0, 0.0, -32.0, 0.0), epsilon));
+        var i: u32 = 1;
+        while (i < 16) : (i += 1) {
+            try expect(approxEqAbs(re[i], f32x4(-32.0, 0.0, -32.0, 0.0), epsilon));
+        }
+
+        const expected = [_]f32{
+            0.0,        0.0,      324.901452,  0.000000, 160.874864,  0.0,      105.489863,  0.000000,
+            77.254834,  0.0,      59.867789,   0.0,      47.891384,   0.0,      38.992113,   0.0,
+            32.000000,  0.000000, 26.261721,   0.000000, 21.381716,   0.000000, 17.104356,   0.000000,
+            13.254834,  0.000000, 9.707094,    0.000000, 6.365196,    0.000000, 3.151725,    0.000000,
+            0.000000,   0.000000, -3.151725,   0.000000, -6.365196,   0.000000, -9.707094,   0.000000,
+            -13.254834, 0.000000, -17.104356,  0.000000, -21.381716,  0.000000, -26.261721,  0.000000,
+            -32.000000, 0.000000, -38.992113,  0.000000, -47.891384,  0.000000, -59.867789,  0.000000,
+            -77.254834, 0.000000, -105.489863, 0.000000, -160.874864, 0.000000, -324.901452, 0.000000,
+        };
+        for (expected, 0..) |e, ie| {
+            try expect(std.math.approxEqAbs(f32, e, im[(ie / 4)][ie % 4], epsilon));
+        }
+    }
+
+    // 128 samples
+    {
+        var re = [_]F32x4{
+            f32x4(1.0, 2.0, 3.0, 4.0),     f32x4(5.0, 6.0, 7.0, 8.0),
+            f32x4(9.0, 10.0, 11.0, 12.0),  f32x4(13.0, 14.0, 15.0, 16.0),
+            f32x4(17.0, 18.0, 19.0, 20.0), f32x4(21.0, 22.0, 23.0, 24.0),
+            f32x4(25.0, 26.0, 27.0, 28.0), f32x4(29.0, 30.0, 31.0, 32.0),
+            f32x4(1.0, 2.0, 3.0, 4.0),     f32x4(5.0, 6.0, 7.0, 8.0),
+            f32x4(9.0, 10.0, 11.0, 12.0),  f32x4(13.0, 14.0, 15.0, 16.0),
+            f32x4(17.0, 18.0, 19.0, 20.0), f32x4(21.0, 22.0, 23.0, 24.0),
+            f32x4(25.0, 26.0, 27.0, 28.0), f32x4(29.0, 30.0, 31.0, 32.0),
+            f32x4(1.0, 2.0, 3.0, 4.0),     f32x4(5.0, 6.0, 7.0, 8.0),
+            f32x4(9.0, 10.0, 11.0, 12.0),  f32x4(13.0, 14.0, 15.0, 16.0),
+            f32x4(17.0, 18.0, 19.0, 20.0), f32x4(21.0, 22.0, 23.0, 24.0),
+            f32x4(25.0, 26.0, 27.0, 28.0), f32x4(29.0, 30.0, 31.0, 32.0),
+            f32x4(1.0, 2.0, 3.0, 4.0),     f32x4(5.0, 6.0, 7.0, 8.0),
+            f32x4(9.0, 10.0, 11.0, 12.0),  f32x4(13.0, 14.0, 15.0, 16.0),
+            f32x4(17.0, 18.0, 19.0, 20.0), f32x4(21.0, 22.0, 23.0, 24.0),
+            f32x4(25.0, 26.0, 27.0, 28.0), f32x4(29.0, 30.0, 31.0, 32.0),
+        };
+        var im = [_]F32x4{
+            f32x4s(0.0), f32x4s(0.0), f32x4s(0.0), f32x4s(0.0),
+            f32x4s(0.0), f32x4s(0.0), f32x4s(0.0), f32x4s(0.0),
+            f32x4s(0.0), f32x4s(0.0), f32x4s(0.0), f32x4s(0.0),
+            f32x4s(0.0), f32x4s(0.0), f32x4s(0.0), f32x4s(0.0),
+            f32x4s(0.0), f32x4s(0.0), f32x4s(0.0), f32x4s(0.0),
+            f32x4s(0.0), f32x4s(0.0), f32x4s(0.0), f32x4s(0.0),
+            f32x4s(0.0), f32x4s(0.0), f32x4s(0.0), f32x4s(0.0),
+            f32x4s(0.0), f32x4s(0.0), f32x4s(0.0), f32x4s(0.0),
+        };
+
+        fftInitUnityTable(unity_table[0..128]);
+        fft(re[0..], im[0..], unity_table[0..128]);
+
+        try expect(approxEqAbs(re[0], f32x4(2112.0, 0.0, 0.0, 0.0), epsilon));
+        var i: u32 = 1;
+        while (i < 32) : (i += 1) {
+            try expect(approxEqAbs(re[i], f32x4(-64.0, 0.0, 0.0, 0.0), epsilon));
+        }
+
+        const expected = [_]f32{
+            0.000000,    0.000000, 0.000000, 0.000000, 649.802905,  0.000000, 0.000000, 0.000000,
+            321.749727,  0.000000, 0.000000, 0.000000, 210.979725,  0.000000, 0.000000, 0.000000,
+            154.509668,  0.000000, 0.000000, 0.000000, 119.735578,  0.000000, 0.000000, 0.000000,
+            95.782769,   0.000000, 0.000000, 0.000000, 77.984226,   0.000000, 0.000000, 0.000000,
+            64.000000,   0.000000, 0.000000, 0.000000, 52.523443,   0.000000, 0.000000, 0.000000,
+            42.763433,   0.000000, 0.000000, 0.000000, 34.208713,   0.000000, 0.000000, 0.000000,
+            26.509668,   0.000000, 0.000000, 0.000000, 19.414188,   0.000000, 0.000000, 0.000000,
+            12.730392,   0.000000, 0.000000, 0.000000, 6.303450,    0.000000, 0.000000, 0.000000,
+            0.000000,    0.000000, 0.000000, 0.000000, -6.303450,   0.000000, 0.000000, 0.000000,
+            -12.730392,  0.000000, 0.000000, 0.000000, -19.414188,  0.000000, 0.000000, 0.000000,
+            -26.509668,  0.000000, 0.000000, 0.000000, -34.208713,  0.000000, 0.000000, 0.000000,
+            -42.763433,  0.000000, 0.000000, 0.000000, -52.523443,  0.000000, 0.000000, 0.000000,
+            -64.000000,  0.000000, 0.000000, 0.000000, -77.984226,  0.000000, 0.000000, 0.000000,
+            -95.782769,  0.000000, 0.000000, 0.000000, -119.735578, 0.000000, 0.000000, 0.000000,
+            -154.509668, 0.000000, 0.000000, 0.000000, -210.979725, 0.000000, 0.000000, 0.000000,
+            -321.749727, 0.000000, 0.000000, 0.000000, -649.802905, 0.000000, 0.000000, 0.000000,
+        };
+        for (expected, 0..) |e, ie| {
+            try expect(std.math.approxEqAbs(f32, e, im[(ie / 4)][ie % 4], epsilon));
+        }
+    }
+}
+
+fn fftUnswizzle(input: []const F32x4, output: []F32x4) void {
+    assert(std.math.isPowerOfTwo(input.len));
+    assert(input.len == output.len);
+    assert(input.ptr != output.ptr);
+
+    const log2_length = std.math.log2_int(usize, input.len * 4);
+    assert(log2_length >= 2);
+
+    const length = input.len;
+
+    const f32_output = @ptrCast([*]f32, output.ptr)[0 .. output.len * 4];
+
+    const static = struct {
+        const swizzle_table = [256]u8{
+            0x00, 0x40, 0x80, 0xC0, 0x10, 0x50, 0x90, 0xD0, 0x20, 0x60, 0xA0, 0xE0, 0x30, 0x70, 0xB0, 0xF0,
+            0x04, 0x44, 0x84, 0xC4, 0x14, 0x54, 0x94, 0xD4, 0x24, 0x64, 0xA4, 0xE4, 0x34, 0x74, 0xB4, 0xF4,
+            0x08, 0x48, 0x88, 0xC8, 0x18, 0x58, 0x98, 0xD8, 0x28, 0x68, 0xA8, 0xE8, 0x38, 0x78, 0xB8, 0xF8,
+            0x0C, 0x4C, 0x8C, 0xCC, 0x1C, 0x5C, 0x9C, 0xDC, 0x2C, 0x6C, 0xAC, 0xEC, 0x3C, 0x7C, 0xBC, 0xFC,
+            0x01, 0x41, 0x81, 0xC1, 0x11, 0x51, 0x91, 0xD1, 0x21, 0x61, 0xA1, 0xE1, 0x31, 0x71, 0xB1, 0xF1,
+            0x05, 0x45, 0x85, 0xC5, 0x15, 0x55, 0x95, 0xD5, 0x25, 0x65, 0xA5, 0xE5, 0x35, 0x75, 0xB5, 0xF5,
+            0x09, 0x49, 0x89, 0xC9, 0x19, 0x59, 0x99, 0xD9, 0x29, 0x69, 0xA9, 0xE9, 0x39, 0x79, 0xB9, 0xF9,
+            0x0D, 0x4D, 0x8D, 0xCD, 0x1D, 0x5D, 0x9D, 0xDD, 0x2D, 0x6D, 0xAD, 0xED, 0x3D, 0x7D, 0xBD, 0xFD,
+            0x02, 0x42, 0x82, 0xC2, 0x12, 0x52, 0x92, 0xD2, 0x22, 0x62, 0xA2, 0xE2, 0x32, 0x72, 0xB2, 0xF2,
+            0x06, 0x46, 0x86, 0xC6, 0x16, 0x56, 0x96, 0xD6, 0x26, 0x66, 0xA6, 0xE6, 0x36, 0x76, 0xB6, 0xF6,
+            0x0A, 0x4A, 0x8A, 0xCA, 0x1A, 0x5A, 0x9A, 0xDA, 0x2A, 0x6A, 0xAA, 0xEA, 0x3A, 0x7A, 0xBA, 0xFA,
+            0x0E, 0x4E, 0x8E, 0xCE, 0x1E, 0x5E, 0x9E, 0xDE, 0x2E, 0x6E, 0xAE, 0xEE, 0x3E, 0x7E, 0xBE, 0xFE,
+            0x03, 0x43, 0x83, 0xC3, 0x13, 0x53, 0x93, 0xD3, 0x23, 0x63, 0xA3, 0xE3, 0x33, 0x73, 0xB3, 0xF3,
+            0x07, 0x47, 0x87, 0xC7, 0x17, 0x57, 0x97, 0xD7, 0x27, 0x67, 0xA7, 0xE7, 0x37, 0x77, 0xB7, 0xF7,
+            0x0B, 0x4B, 0x8B, 0xCB, 0x1B, 0x5B, 0x9B, 0xDB, 0x2B, 0x6B, 0xAB, 0xEB, 0x3B, 0x7B, 0xBB, 0xFB,
+            0x0F, 0x4F, 0x8F, 0xCF, 0x1F, 0x5F, 0x9F, 0xDF, 0x2F, 0x6F, 0xAF, 0xEF, 0x3F, 0x7F, 0xBF, 0xFF,
+        };
+    };
+
+    if ((log2_length & 1) == 0) {
+        const rev32 = @intCast(u6, 32 - log2_length);
+        var index: usize = 0;
+        while (index < length) : (index += 1) {
+            const n = index * 4;
+            const addr =
+                (@intCast(usize, static.swizzle_table[n & 0xff]) << 24) |
+                (@intCast(usize, static.swizzle_table[(n >> 8) & 0xff]) << 16) |
+                (@intCast(usize, static.swizzle_table[(n >> 16) & 0xff]) << 8) |
+                @intCast(usize, static.swizzle_table[(n >> 24) & 0xff]);
+            f32_output[addr >> rev32] = input[index][0];
+            f32_output[(0x40000000 | addr) >> rev32] = input[index][1];
+            f32_output[(0x80000000 | addr) >> rev32] = input[index][2];
+            f32_output[(0xC0000000 | addr) >> rev32] = input[index][3];
+        }
+    } else {
+        const rev7 = @as(usize, 1) << @intCast(u6, log2_length - 3);
+        const rev32 = @intCast(u6, 32 - (log2_length - 3));
+        var index: usize = 0;
+        while (index < length) : (index += 1) {
+            const n = index / 2;
+            var addr =
+                (((@intCast(usize, static.swizzle_table[n & 0xff]) << 24) |
+                (@intCast(usize, static.swizzle_table[(n >> 8) & 0xff]) << 16) |
+                (@intCast(usize, static.swizzle_table[(n >> 16) & 0xff]) << 8) |
+                (@intCast(usize, static.swizzle_table[(n >> 24) & 0xff]))) >> rev32) |
+                ((index & 1) * rev7 * 4);
+            f32_output[addr] = input[index][0];
+            addr += rev7;
+            f32_output[addr] = input[index][1];
+            addr += rev7;
+            f32_output[addr] = input[index][2];
+            addr += rev7;
+            f32_output[addr] = input[index][3];
+        }
+    }
+}
+
+pub fn fftInitUnityTable(out_unity_table: []F32x4) void {
+    assert(std.math.isPowerOfTwo(out_unity_table.len));
+    assert(out_unity_table.len >= 32 and out_unity_table.len <= 512);
+
+    var unity_table = out_unity_table;
+
+    const v0123 = f32x4(0.0, 1.0, 2.0, 3.0);
+    var length = out_unity_table.len / 4;
+    var vlstep = f32x4s(0.5 * math.pi / @intToFloat(f32, length));
+
+    while (true) {
+        length /= 4;
+        var vjp = v0123;
+
+        var j: u32 = 0;
+        while (j < length) : (j += 1) {
+            unity_table[j] = f32x4s(1.0);
+            unity_table[j + length * 4] = f32x4s(0.0);
+
+            var vls = vjp * vlstep;
+            var sin_cos = sincos(vls);
+            unity_table[j + length] = sin_cos[1];
+            unity_table[j + length * 5] = sin_cos[0] * f32x4s(-1.0);
+
+            var vijp = vjp + vjp;
+            vls = vijp * vlstep;
+            sin_cos = sincos(vls);
+            unity_table[j + length * 2] = sin_cos[1];
+            unity_table[j + length * 6] = sin_cos[0] * f32x4s(-1.0);
+
+            vijp = vijp + vjp;
+            vls = vijp * vlstep;
+            sin_cos = sincos(vls);
+            unity_table[j + length * 3] = sin_cos[1];
+            unity_table[j + length * 7] = sin_cos[0] * f32x4s(-1.0);
+
+            vjp += f32x4s(4.0);
+        }
+        vlstep *= f32x4s(4.0);
+        unity_table = unity_table[8 * length ..];
+
+        if (length <= 4)
+            break;
+    }
+}
+
+pub fn fft(re: []F32x4, im: []F32x4, unity_table: []const F32x4) void {
+    const length = @intCast(u32, re.len * 4);
+    assert(std.math.isPowerOfTwo(length));
+    assert(length >= 4 and length <= 512);
+    assert(re.len == im.len);
+
+    var re_temp_storage: [128]F32x4 = undefined;
+    var im_temp_storage: [128]F32x4 = undefined;
+    var re_temp = re_temp_storage[0..re.len];
+    var im_temp = im_temp_storage[0..im.len];
+
+    std.mem.copy(F32x4, re_temp, re);
+    std.mem.copy(F32x4, im_temp, im);
+
+    if (length > 16) {
+        assert(unity_table.len == length);
+        fftN(re_temp, im_temp, unity_table, length, 1);
+    } else if (length == 16) {
+        fft16(re_temp, im_temp, 1);
+    } else if (length == 8) {
+        fft8(re_temp, im_temp, 1);
+    } else if (length == 4) {
+        fft4(re_temp, im_temp, 1);
+    }
+
+    fftUnswizzle(re_temp, re);
+    fftUnswizzle(im_temp, im);
+}
+
+pub fn ifft(re: []F32x4, im: []const F32x4, unity_table: []const F32x4) void {
+    const length = @intCast(u32, re.len * 4);
+    assert(std.math.isPowerOfTwo(length));
+    assert(length >= 4 and length <= 512);
+    assert(re.len == im.len);
+
+    var re_temp_storage: [128]F32x4 = undefined;
+    var im_temp_storage: [128]F32x4 = undefined;
+    var re_temp = re_temp_storage[0..re.len];
+    var im_temp = im_temp_storage[0..im.len];
+
+    const rnp = f32x4s(1.0 / @intToFloat(f32, length));
+    const rnm = f32x4s(-1.0 / @intToFloat(f32, length));
+
+    for (re, 0..) |_, i| {
+        re_temp[i] = re[i] * rnp;
+        im_temp[i] = im[i] * rnm;
+    }
+
+    if (length > 16) {
+        assert(unity_table.len == length);
+        fftN(re_temp, im_temp, unity_table, length, 1);
+    } else if (length == 16) {
+        fft16(re_temp, im_temp, 1);
+    } else if (length == 8) {
+        fft8(re_temp, im_temp, 1);
+    } else if (length == 4) {
+        fft4(re_temp, im_temp, 1);
+    }
+
+    fftUnswizzle(re_temp, re);
+}
+test "zmath.ifft" {
+    var unity_table: [512]F32x4 = undefined;
+    const epsilon = 0.0001;
+
+    // 64 samples
+    {
+        var re = [_]F32x4{
+            f32x4(1.0, 2.0, 3.0, 4.0),     f32x4(5.0, 6.0, 7.0, 8.0),
+            f32x4(9.0, 10.0, 11.0, 12.0),  f32x4(13.0, 14.0, 15.0, 16.0),
+            f32x4(17.0, 18.0, 19.0, 20.0), f32x4(21.0, 22.0, 23.0, 24.0),
+            f32x4(25.0, 26.0, 27.0, 28.0), f32x4(29.0, 30.0, 31.0, 32.0),
+            f32x4(1.0, 2.0, 3.0, 4.0),     f32x4(5.0, 6.0, 7.0, 8.0),
+            f32x4(9.0, 10.0, 11.0, 12.0),  f32x4(13.0, 14.0, 15.0, 16.0),
+            f32x4(17.0, 18.0, 19.0, 20.0), f32x4(21.0, 22.0, 23.0, 24.0),
+            f32x4(25.0, 26.0, 27.0, 28.0), f32x4(29.0, 30.0, 31.0, 32.0),
+        };
+        var im = [_]F32x4{
+            f32x4s(0.0), f32x4s(0.0), f32x4s(0.0), f32x4s(0.0),
+            f32x4s(0.0), f32x4s(0.0), f32x4s(0.0), f32x4s(0.0),
+            f32x4s(0.0), f32x4s(0.0), f32x4s(0.0), f32x4s(0.0),
+            f32x4s(0.0), f32x4s(0.0), f32x4s(0.0), f32x4s(0.0),
+        };
+
+        fftInitUnityTable(unity_table[0..64]);
+        fft(re[0..], im[0..], unity_table[0..64]);
+
+        try expect(approxEqAbs(re[0], f32x4(1056.0, 0.0, -32.0, 0.0), epsilon));
+        var i: u32 = 1;
+        while (i < 16) : (i += 1) {
+            try expect(approxEqAbs(re[i], f32x4(-32.0, 0.0, -32.0, 0.0), epsilon));
+        }
+
+        ifft(re[0..], im[0..], unity_table[0..64]);
+
+        try expect(approxEqAbs(re[0], f32x4(1.0, 2.0, 3.0, 4.0), epsilon));
+        try expect(approxEqAbs(re[1], f32x4(5.0, 6.0, 7.0, 8.0), epsilon));
+        try expect(approxEqAbs(re[2], f32x4(9.0, 10.0, 11.0, 12.0), epsilon));
+        try expect(approxEqAbs(re[3], f32x4(13.0, 14.0, 15.0, 16.0), epsilon));
+        try expect(approxEqAbs(re[4], f32x4(17.0, 18.0, 19.0, 20.0), epsilon));
+        try expect(approxEqAbs(re[5], f32x4(21.0, 22.0, 23.0, 24.0), epsilon));
+        try expect(approxEqAbs(re[6], f32x4(25.0, 26.0, 27.0, 28.0), epsilon));
+        try expect(approxEqAbs(re[7], f32x4(29.0, 30.0, 31.0, 32.0), epsilon));
+    }
+
+    // 512 samples
+    {
+        var re: [128]F32x4 = undefined;
+        var im = [_]F32x4{f32x4s(0.0)} ** 128;
+
+        for (&re, 0..) |*v, i| {
+            const f = @intToFloat(f32, i * 4);
+            v.* = f32x4(f + 1.0, f + 2.0, f + 3.0, f + 4.0);
+        }
+
+        fftInitUnityTable(unity_table[0..512]);
+        fft(re[0..], im[0..], unity_table[0..512]);
+
+        for (re, 0..) |v, i| {
+            const f = @intToFloat(f32, i * 4);
+            try expect(!approxEqAbs(v, f32x4(f + 1.0, f + 2.0, f + 3.0, f + 4.0), epsilon));
+        }
+
+        ifft(re[0..], im[0..], unity_table[0..512]);
+
+        for (re, 0..) |v, i| {
+            const f = @intToFloat(f32, i * 4);
+            try expect(approxEqAbs(v, f32x4(f + 1.0, f + 2.0, f + 3.0, f + 4.0), epsilon));
+        }
+    }
+}
+// ------------------------------------------------------------------------------
+//
+// Private functions and constants
+//
+// ------------------------------------------------------------------------------
+const f32x4_sign_mask1: F32x4 = F32x4{ @bitCast(f32, @as(u32, 0x8000_0000)), 0, 0, 0 };
+const f32x4_mask2: F32x4 = F32x4{
+    @bitCast(f32, @as(u32, 0xffff_ffff)),
+    @bitCast(f32, @as(u32, 0xffff_ffff)),
+    0,
+    0,
+};
+const f32x4_mask3: F32x4 = F32x4{
+    @bitCast(f32, @as(u32, 0xffff_ffff)),
+    @bitCast(f32, @as(u32, 0xffff_ffff)),
+    @bitCast(f32, @as(u32, 0xffff_ffff)),
+    0,
+};
+
+inline fn splatNegativeZero(comptime T: type) T {
+    return @splat(veclen(T), @bitCast(f32, @as(u32, 0x8000_0000)));
+}
+inline fn splatNoFraction(comptime T: type) T {
+    return @splat(veclen(T), @as(f32, 8_388_608.0));
+}
+inline fn splatAbsMask(comptime T: type) T {
+    return @splat(veclen(T), @bitCast(f32, @as(u32, 0x7fff_ffff)));
+}
+
+fn floatToIntAndBack(v: anytype) @TypeOf(v) {
+    // This routine won't handle nan, inf and numbers greater than 8_388_608.0 (will generate undefined values).
+    @setRuntimeSafety(false);
+
+    const T = @TypeOf(v);
+    const len = veclen(T);
+
+    var vi32: [len]i32 = undefined;
+    comptime var i: u32 = 0;
+    // vcvttps2dq
+    inline while (i < len) : (i += 1) {
+        vi32[i] = @floatToInt(i32, v[i]);
+    }
+
+    var vf32: [len]f32 = undefined;
+    i = 0;
+    // vcvtdq2ps
+    inline while (i < len) : (i += 1) {
+        vf32[i] = @intToFloat(f32, vi32[i]);
+    }
+
+    return vf32;
+}
+test "zmath.floatToIntAndBack" {
+    {
+        const v = floatToIntAndBack(f32x4(1.1, 2.9, 3.0, -4.5));
+        try expect(approxEqAbs(v, f32x4(1.0, 2.0, 3.0, -4.0), 0.0));
+    }
+    {
+        const v = floatToIntAndBack(f32x8(1.1, 2.9, 3.0, -4.5, 2.5, -2.5, 1.1, -100.2));
+        try expect(approxEqAbs(v, f32x8(1.0, 2.0, 3.0, -4.0, 2.0, -2.0, 1.0, -100.0), 0.0));
+    }
+    {
+        const v = floatToIntAndBack(f32x4(math.inf_f32, 2.9, math.nan_f32, math.qnan_f32));
+        try expect(v[1] == 2.0);
+    }
+}
+
+pub fn approxEqAbs(v0: anytype, v1: anytype, eps: f32) bool {
+    const T = @TypeOf(v0, v1);
+    comptime var i: comptime_int = 0;
+    inline while (i < veclen(T)) : (i += 1) {
+        if (!math.approxEqAbs(f32, v0[i], v1[i], eps)) {
+            return false;
+        }
+    }
+    return true;
+}
+
+// ------------------------------------------------------------------------------
+// This software is available under 2 licenses -- choose whichever you prefer.
+// ------------------------------------------------------------------------------
+// ALTERNATIVE A - MIT License
+// Copyright (c) 2022 Michal Ziulek and Contributors
+// Permission is hereby granted, free of charge, to any person obtaining a copy of
+// this software and associated documentation files (the "Software"), to deal in
+// the Software without restriction, including without limitation the rights to
+// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+// of the Software, and to permit persons to whom the Software is furnished to do
+// so, subject to the following conditions:
+// The above copyright notice and this permission notice shall be included in all
+// copies or substantial portions of the Software.
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+// ------------------------------------------------------------------------------
+// ALTERNATIVE B - Public Domain (www.unlicense.org)
+// This is free and unencumbered software released into the public domain.
+// Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
+// software, either in source code form or as a compiled binary, for any purpose,
+// commercial or non-commercial, and by any means.
+// In jurisdictions that recognize copyright laws, the author or authors of this
+// software dedicate any and all copyright interest in the software to the public
+// domain. We make this dedication for the benefit of the public at large and to
+// the detriment of our heirs and successors. We intend this dedication to be an
+// overt act of relinquishment in perpetuity of all present and future rights to
+// this software under copyright law.
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+// ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+// ------------------------------------------------------------------------------
diff --git a/src/c.zig b/src/c.zig
new file mode 100644
index 0000000..b2aa51f
--- /dev/null
+++ b/src/c.zig
@@ -0,0 +1,9 @@
+pub usingnamespace @cImport({
+    @cInclude("glad/glad.h");
+    @cInclude("GLFW/glfw3.h");
+
+    @cDefine("STB_IMAGE_IMPLEMENTATION", "");
+    @cDefine("TINYOBJ_LOADER_C_IMPLEMENTATION", "");
+    @cInclude("loaders/stb_image.h");
+    @cInclude("loaders/tinyobj.h");
+});
diff --git a/src/gfx/Color.zig b/src/gfx/Color.zig
new file mode 100644
index 0000000..2491d4c
--- /dev/null
+++ b/src/gfx/Color.zig
@@ -0,0 +1,43 @@
+fn hue_to_rgb(p: f32, q: f32, t: f32) f32 {
+    if (t < 0) {
+        t += 1;
+    } else if (t > 1) {
+        t -= 1;
+    }
+    if (t < 1.0 / 6) return p + (q - p) * 6 * t;
+    if (t < 1.0 / 2) return q;
+    if (t < 2.0 / 3) return p + (q - p) * (2.0 / 3 - t) * 6;
+    return p;
+}
+
+fn hsl_to_hex(h: f32, s: f32, l: f32) @Vector(3, f32) {
+    h /= 360;
+    s /= 100;
+    l /= 100;
+    const r: f32;
+    const g: f32;
+    const b: f32;
+    if (s == 0) {
+        r = l;
+        g = l;
+        b = l;
+    } else {
+        const q = if (l < 0.5) l * (1 + s) else l + s - l * s;
+        const p = 2 * l - q;
+        r = hue_to_rgb(p, q, h + 1.0 / 3);
+        g = hue_to_rgb(p, q, h);
+        b = hue_to_rgb(p, q, h - 1.0 / 3);
+    }
+    return @Vector(3, f32){ r, g, b };
+}
+
+pub fn color_from_index(index: i32) @Vector(3, f32) {
+    const color_wheel_cycle = @floor(index / 6.0);
+    const darkness_cycle = @floor(index / 12.0);
+    const spacing = (360.0 / 6.0);
+    const offset = if (color_wheel_cycle == 0) 0 else spacing / (color_wheel_cycle + 2);
+    const hue = spacing * (index % 6) + offset;
+    const saturation = 100.0f;
+    const lightness = 1.0f / (2 + darkness_cycle) * 100;
+    return hsl_to_hex(hue, saturation, lightness);
+}
diff --git a/src/gfx/Mesh.zig b/src/gfx/Mesh.zig
new file mode 100644
index 0000000..bac0bf5
--- /dev/null
+++ b/src/gfx/Mesh.zig
@@ -0,0 +1,94 @@
+const std = @import("std");
+const c = @import("../c.zig");
+const djleddaGeom = @import("djleddaGeom.zig");
+
+pub const Mesh = struct {
+    vao: c_uint,
+    vbo_xyz: c_uint,
+    vbo_uv: c_uint,
+    vbo_norm: c_uint,
+    ebo: c_uint,
+    num_indices: c_uint,
+
+    pub fn from_shape(shape: *const djleddaGeom.Shape) void {
+        const mesh = Mesh{};
+        mesh.num_indices = shape.indices.len;
+        c.glGenVertexArrays(1, &mesh.vao);
+        c.glGenBuffers(1, &mesh.vbo_xyz);
+        c.glGenBuffers(1, &mesh.vbo_uv);
+        c.glGenBuffers(1, &mesh.ebo);
+
+        c.glBindVertexArray(mesh.vao);
+
+        c.glBindBuffer(c.GL_ARRAY_BUFFER, mesh.vbo_xyz);
+        c.glBufferData(c.GL_ARRAY_BUFFER, shape.xyz.ptr * @sizeOf(float), shape.xyz, c.GL_STATIC_DRAW);
+        c.glVertexAttribPointer(0, 3, c.GL_FLOAT, c.GL_FALSE, 3 * @sizeOf(f32), @as(*void, 0));
+        c.glEnableVertexAttribArray(0);
+
+        c.glBindBuffer(c.GL_ARRAY_BUFFER, mesh.vbo_uv);
+        c.glBufferData(c.GL_ARRAY_BUFFER, shape.uv.ptr * @sizeOf(f32), shape.uv, c.GL_STATIC_DRAW);
+        c.glVertexAttribPointer(1, 2, c.GL_FLOAT, c.GL_FALSE, 2 * @sizeOf(f32), @as(*void, 0));
+        c.glEnableVertexAttribArray(1);
+
+        c.glBindBuffer(c.GL_ELEMENT_ARRAY_BUFFER, mesh.ebo);
+        c.glBufferData(c.GL_ELEMENT_ARRAY_BUFFER, shape.indices.len * @sizeOf(c_uint), shape.indices.ptr, c.GL_STATIC_DRAW);
+    }
+
+//    pub fn init(obj_file: *[]const u8) void {
+//        const reader = c.tinyobj.ObjReader();
+//        const success = reader.ParseFromFile(obj_file);
+//        std.debug.print("{}\n", .{reader.Error()});
+//
+//        const attrib = reader.GetAttrib();
+//
+//        const indices_t = reader.GetShapes().at(0).mesh.indices;
+//        const indices = ArrayList(c_uint)(indices_t.size());
+//
+//        const vertices = ArrayList()(3*indices_t.size());
+//        const normals = ArrayList()(3*indices_t.size());
+//        const texcoords = ArrayList()(2*indices_t.size());
+//
+//        for (int i = 0; i < indices_t.size(); i++) {
+//            const vertex_data = indices_t[i];
+//            vertices[3*i] = attrib.vertices[3*vertex_data.vertex_index];
+//            vertices[3*i+1] = attrib.vertices[3*vertex_data.vertex_index + 1];
+//            vertices[3*i+2] = attrib.vertices[3*vertex_data.vertex_index + 2];
+//
+//            normals[3*i] = attrib.normals[3*vertex_data.normal_index];
+//            normals[3*i+1] = attrib.normals[3*vertex_data.normal_index + 1];
+//            normals[3*i+2] = attrib.normals[3*vertex_data.normal_index + 2];
+//
+//            texcoords[2*i] = attrib.texcoords[2*vertex_data.texcoord_index];
+//            texcoords[2*i+1] = attrib.texcoords[2*vertex_data.texcoord_index + 1];
+//
+//            indices[i] = i;
+//        }
+//
+//        num_indices = indices_t.size();
+//        glGenVertexArrays(1, &vao);
+//        glGenBuffers(1, &vbo_xyz);
+//        glGenBuffers(1, &vbo_uv);
+//        glGenBuffers(1, &vbo_norm);
+//        //glGenBuffers(1, &ebo);
+//
+//        glBindVertexArray(vao);
+//
+//        glBindBuffer(GL_ARRAY_BUFFER, vbo_xyz);
+//        glBufferData(GL_ARRAY_BUFFER, vertices.size() * sizeof(float), vertices.data(), GL_STATIC_DRAW);
+//        glVertexAttribPointer(0, 3, GL_FLOAT, GL_FALSE, 3 * sizeof(float), (void*)0);
+//        glEnableVertexAttribArray(0);
+//
+//        glBindBuffer(GL_ARRAY_BUFFER, vbo_uv);
+//        glBufferData(GL_ARRAY_BUFFER, texcoords.size() * sizeof(float), texcoords.data(), GL_STATIC_DRAW);
+//        glVertexAttribPointer(1, 2, GL_FLOAT, GL_FALSE, 2 * sizeof(float), (void*)0);
+//        glEnableVertexAttribArray(1);
+//
+//        glBindBuffer(GL_ARRAY_BUFFER, vbo_norm);
+//        glBufferData(GL_ARRAY_BUFFER, normals.size() * sizeof(float), normals.data(), GL_STATIC_DRAW);
+//        glVertexAttribPointer(2, 3, GL_FLOAT, GL_FALSE, 3 * sizeof(float), (void*)0);
+//        glEnableVertexAttribArray(2);
+//
+//        //glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, ebo);
+//        //glBufferData(GL_ELEMENT_ARRAY_BUFFER, indices.size() * sizeof(unsigned int), indices.data(), GL_STATIC_DRAW);
+//    }
+};
diff --git a/src/gfx/Shader.zig b/src/gfx/Shader.zig
new file mode 100644
index 0000000..26bcce5
--- /dev/null
+++ b/src/gfx/Shader.zig
@@ -0,0 +1,56 @@
+const c = @import("../c.zig");
+const std = @import("std");
+
+const ShaderType = enum(u32) {
+    fragment = c.GL_FRAGMENT_SHADER,
+    vertex = c.GL_VERTEX_SHADER,
+};
+
+fn create_shader(file_path: []const u8, shader_type: ShaderType, info_log: *[]const u8, allocator: *std.mem.Allocator) c_uint {
+    const file = try std.fs.openFileAbsolute(file_path);
+
+    const file_reader = file.reader(file);
+    const shader_code = std.ArrayList(u8);
+    shader_code.initCapacity(allocator, 1024);
+    defer allocator.free(shader_code);
+
+    file_reader.readAllArrayList(shader_code, 1024 * 1024);
+
+    const vertex_shader = c.glCreateShader(shader_type);
+    c.glShaderSource(vertex_shader, 1, &shader_code.items, c.NULL);
+    c.glCompileShader(vertex_shader);
+    const success: i32 = undefined;
+    c.glGetShaderiv(vertex_shader, c.GL_COMPILE_STATUS, &success);
+    if (success != 0) {
+        c.glGetShaderInfoLog(vertex_shader, 512, c.NULL, info_log);
+        const shader_type_name = if (shader_type == ShaderType.fragment) "FRAGMENT" else "VERTEX";
+        std.debug.print("ERROR::SHADER::{}::COMPILATION_FAILED\n{}\n", .{ shader_type_name, info_log });
+    }
+
+    return vertex_shader;
+}
+
+const Shader = struct {
+    prog_id: c_uint,
+
+    pub fn init(self: Shader, vertex_path: *[]const u8, fragment_path: *[]const u8, allocator: *std.mem.Allocator) void {
+        const info_log = [512]u8{};
+        const vertex_shader = create_shader(vertex_path, ShaderType.vertex, &info_log, allocator);
+        const fragment_shader = create_shader(fragment_path, ShaderType.fragment, &info_log, allocator);
+
+        self.prog_id = c.glCreateProgram();
+        c.glAttachShader(self.prog_id, vertex_shader);
+        c.glAttachShader(self.prog_id, fragment_shader);
+        c.glLinkProgram(self.prog_id);
+
+        const success: c_uint = undefined;
+        c.glGetProgramiv(self.prog_id, c.GL_LINK_STATUS, &success);
+        if (!success) {
+            c.glGetProgramInfoLog(self.prog_id, 512, c.NULL, &info_log);
+            std.debug.print("ERROR::SHADER::PROGRAM::LINK_FAILED\n{}\n", .{info_log});
+        }
+
+        c.glDeleteShader(vertex_shader);
+        c.glDeleteShader(fragment_shader);
+    }
+};
diff --git a/src/gfx/djleddaGeom.zig b/src/gfx/djleddaGeom.zig
new file mode 100644
index 0000000..eb95d8e
--- /dev/null
+++ b/src/gfx/djleddaGeom.zig
@@ -0,0 +1,57 @@
+// Buffer layout:
+// X, Y, Z, U, V
+
+pub const Shape = struct {
+    indices: []c_uint,
+    uv: []f32,
+    xyz: []f32,
+};
+
+const triangle_vertices = []f32{
+    -0.5, -0.5, 0.0, 1.0, 1.0,
+    0.5,  -0.5, 0.0, 0.5, 0.5,
+    0.0,  0.5,  0.0, 0.0, 0.0,
+};
+
+const triangle_indices = []c_uint{ 0, 1, 2 };
+
+const cube_vertices = []f32{ -0.5, -0.5, -0.5, 0.0, 0.0, 0.5, -0.5, -0.5, 1.0, 0.0, 0.5, 0.5, -0.5, 1.0, 1.0, 0.5, 0.5, -0.5, 1.0, 1.0, -0.5, 0.5, -0.5, 0.0, 1.0, -0.5, -0.5, -0.5, 0.0, 0.0, -0.5, -0.5, 0.5, 0.0, 0.0, 0.5, -0.5, 0.5, 1.0, 0.0, 0.5, 0.5, 0.5, 1.0, 1.0, 0.5, 0.5, 0.5, 1.0, 1.0, -0.5, 0.5, 0.5, 0.0, 1.0, -0.5, -0.5, 0.5, 0.0, 0.0, -0.5, 0.5, 0.5, 1.0, 0.0, -0.5, 0.5, -0.5, 1.0, 1.0, -0.5, -0.5, -0.5, 0.0, 1.0, -0.5, -0.5, -0.5, 0.0, 1.0, -0.5, -0.5, 0.5, 0.0, 0.0, -0.5, 0.5, 0.5, 1.0, 0.0, 0.5, 0.5, 0.5, 1.0, 0.0, 0.5, 0.5, -0.5, 1.0, 1.0, 0.5, -0.5, -0.5, 0.0, 1.0, 0.5, -0.5, -0.5, 0.0, 1.0, 0.5, -0.5, 0.5, 0.0, 0.0, 0.5, 0.5, 0.5, 1.0, 0.0, -0.5, -0.5, -0.5, 0.0, 1.0, 0.5, -0.5, -0.5, 1.0, 1.0, 0.5, -0.5, 0.5, 1.0, 0.0, 0.5, -0.5, 0.5, 1.0, 0.0, -0.5, -0.5, 0.5, 0.0, 0.0, -0.5, -0.5, -0.5, 0.0, 1.0, -0.5, 0.5, -0.5, 0.0, 1.0, 0.5, 0.5, -0.5, 1.0, 1.0, 0.5, 0.5, 0.5, 1.0, 0.0, 0.5, 0.5, 0.5, 1.0, 0.0, -0.5, 0.5, 0.5, 0.0, 0.0, -0.5, 0.5, -0.5, 0.0, 1.0 };
+
+const cube_indices = []c_uint{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31 };
+
+const square_xyz = []f32{
+    0.5,  0.5,  0.0,
+    0.5,  -0.5, 0.0,
+    -0.5, -0.5, 0.0,
+    -0.5, 0.5,  0.0,
+};
+
+const square_uv = []f32{
+    1.0, 1.0,
+    1.0, 0.0,
+    0.0, 0.0,
+    0.0, 1.0,
+};
+
+const square_indices = []c_uint{
+    0, 1, 3,
+    1, 2, 3,
+};
+
+pub const TRIANGLE = Shape{
+    .indices = triangle_indices,
+    .uv = triangle_vertices,
+    .xyz = triangle_vertices,
+};
+
+pub const SQUARE = Shape{
+    .indices = square_indices,
+    .uv = square_uv,
+    .xyz = square_xyz,
+};
+
+pub const CUBE = Shape{
+    .indices = cube_indices,
+    .uv = triangle_vertices,
+    .xyz = triangle_vertices,
+};
diff --git a/src/main.cpp b/src/main.cpp
index b88847c..44d9521 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -1,6 +1,5 @@
 #include <bitset>
 #include <array>
-#include <glm/ext/matrix_transform.hpp>
 #include <span>
 #include <cstdint>
 #include <iostream>
@@ -10,6 +9,7 @@
 #include <optional>
 
 #include "glad/glad.h"
+#include <glm/ext/matrix_transform.hpp>
 #include <GLFW/glfw3.h>
 #include <glm/glm.hpp>
 #include <glm/gtx/quaternion.hpp>
diff --git a/src/main.zig b/src/main.zig
new file mode 100644
index 0000000..7214248
--- /dev/null
+++ b/src/main.zig
@@ -0,0 +1,402 @@
+const std = @import("std");
+const c = @import("c.zig");
+const zm = @import("zm");
+
+const Mesh = @import("gfx/Mesh.zig").Mesh;
+
+const ArrayList = std.ArrayList;
+
+fn print_mat(matrix: *const zm.Mat) void {
+    std.debug.print("{}, {}, {}, {}\n", .{ matrix[0][0], matrix[0][1], matrix[0][2], matrix[0][3] });
+    std.debug.print("{}, {}, {}, {}\n", .{ matrix[1][0], matrix[1][1], matrix[1][2], matrix[1][3] });
+    std.debug.print("{}, {}, {}, {}\n", .{ matrix[2][0], matrix[2][1], matrix[2][2], matrix[2][3] });
+    std.debug.print("{}, {}, {}, {}\n", .{ matrix[3][0], matrix[3][1], matrix[3][2], matrix[3][3] });
+}
+
+const Camera = struct {
+    view: zm.Mat = .{ zm.f32x4s(0.0), zm.f32x4s(0.0), zm.f32x4s(0.0), zm.f32x4s(0.0) },
+    proj: zm.Mat,
+    pos: zm.Vec = zm.f32x4s(0.0),
+    up: zm.Vec = zm.f32x4s(0.0),
+    target: zm.Vec,
+
+    pub fn init(self: Camera, aspect_ratio: f32) void {
+        self.proj = zm.perspectiveFovRh(std.math.degreesToRadians(45.0), aspect_ratio, 0.1, 100.0);
+    }
+
+    pub fn new(aspect_ratio: f32) Camera {
+        const cam = Camera{};
+        init(cam, aspect_ratio);
+        return cam;
+    }
+
+    pub fn look_at(self: Camera, x: f32, y: f32, z: f32) void {
+        self.target = zm.f32x4(x, y, z, 0.0);
+        self.view = zm.lookAtRh(self.pos, self.target, self.up);
+    }
+
+    pub fn set_up(self: Camera, up_x: f32, up_y: f32, up_z: f32) void {
+        self.up = zm.f32x4(up_x, up_y, up_z, 0.0);
+    }
+};
+
+const GlobalAppState = struct {
+    current_polycube: i32,
+    last_polycube_visible: i32,
+    active_shader: ?*Shader,
+    polycubes: ArrayList(Polycube),
+};
+
+const app_state: GlobalAppState = .{};
+
+const WindowDims = struct {
+    width: u32,
+    height: u32,
+};
+
+const Entity = struct {
+    mesh: *Mesh,
+    tex: *Texture,
+    visible: bool,
+    scene_graph_node: i32,
+};
+
+const SceneGraphNode = struct {
+    local: zm.Mat,
+    world: zm.Mat,
+    translation: zm.Vec,
+    rotation: zm.Quat,
+    scale: zm.Vec,
+    children: ArrayList(i32),
+    entity: ?i32,
+
+    pub fn reset(self: SceneGraphNode) void {
+        self.scale = zm.f32x4(1.0, 1.0, 1.0, 0.0);
+        self.translation = zm.f32x4s(0.0);
+        self.rotation = zm.f32x4s(0.0);
+    }
+
+    pub fn init(self: SceneGraphNode) void {
+        self.reset();
+        self.local = zm.identity();
+        self.world = self.local;
+    }
+
+    pub fn update_local(self: SceneGraphNode) void {
+        const scaling = zm.scaling(self.scale);
+        const translation = zm.translation(self.translation);
+        const rotation = zm.quatToMat(self.rotation);
+        self.local = zm.mul(zm.mul(translation, rotation), scaling);
+        self.local = scaling(
+            zm.translate(
+                zm.identity(),
+                self.translation
+            ) * toMat4(self.rotation),
+            self.scale
+        );
+    }
+};
+
+
+const Polycube = struct {
+    graph_node: i32,
+    color: zm.Vec,
+
+    pub fn show(self: Polycube) void {
+        const node = get_scene_graph_node(self.graph_node);
+        for (node.children.items) |child_id| {
+            const child_node = get_scene_graph_node(child_id);
+            if (child_node.entity) |entity_id| {
+                get_entity(entity_id).visible = true;
+            }
+        }
+    }
+
+    pub fn hide(self: Polycube) void {
+        const node = get_scene_graph_node(self.graph_node);
+        for (node.children.items) |child_id| {
+            const child_node = get_scene_graph_node(child_id);
+            if (child_node.entity) |entity_id| {
+                get_entity(entity_id).visible = false;
+            }
+        }
+    }
+
+    pub fn get_centre(self: Polycube) zm.Vec {
+        const centre = zm.Vec(0.0);
+        for (get_scene_graph_node(self.graph_node).children.items) |child_id| {
+            centre += get_scene_graph_node(child_id).translation;
+        }
+        centre /= get_scene_graph_node(self.graph_node).children.size();
+        return centre;
+    }
+};
+
+const Frame = struct {
+    width: i32,
+    height: i32,
+    x: i32,
+    y: i32,
+    cam: *Camera,
+
+    pub fn new(camera: *Camera, width: i32, height: i32) Frame {
+        const frame = Frame{};
+        camera.init(@as(f32, width) / @as(f32, height));
+        frame.cam = camera;
+        return frame;
+    }
+};
+
+fn framebuffer_size_callback(width: i32, height: i32) void {
+    c.glViewport(0, 0, width, height);
+}
+
+fn init_window_and_gl(window_dims: *WindowDims) ?*c.GLFWwindow {
+    c.glfwInit();
+    c.glfwWindowHint(c.GLFW_CONTEXT_VERSION_MAJOR, 4);
+    c.glfwWindowHint(c.GLFW_CONTEXT_VERSION_MINOR, 6);
+    c.glfwWindowHint(c.GLFW_OPENGL_PROFILE, c.GLFW_OPENGL_CORE_PROFILE);
+    const window = c.glfwCreateWindow(window_dims.width, window_dims.height, "Somaesque", c.NULL, c.NULL);
+    if (window == c.NULL) {
+        std.debug.print("Failed to create GLFW window");
+        c.glfwTerminate();
+        return null;
+    }
+    c.glfwMakeContextCurrent(window);
+
+    if (!c.gladLoadGLLoader(@as(c.GLADloadproc, c.glfwGetProcAddress))) {
+        std.debug.print("Failed to initialize GLAD");
+        return null;
+    }
+
+    c.glViewport(0, 0, 800, 600);
+    c.glfwSetFramebufferSizeCallback(window, framebuffer_size_callback);
+    c.glEnable(c.GL_DEPTH_TEST);
+    return window;
+}
+
+fn gl_update_viewport(window_dims: *WindowDims, frame: *Frame) void {
+    c.glViewport(frame.x, window_dims.height - frame.y - frame.height, frame.width, frame.height);
+}
+
+const cube_mesh = Mesh{};
+const wall_tex = Texture{};
+const entities = ArrayList(Entity);
+const scene_graph_nodes = ArrayList(SceneGraphNode);
+
+fn process_input(window: *c.GLFWwindow) void {
+    const static = struct {
+        wireframe: bool = false,
+        last_frame_state_press_enter: bool = false,
+        last_frame_state_press: bool = false,
+    };
+
+    if (c.glfwGetKey(window, c.GLFW_KEY_ESCAPE) == c.GLFW_PRESS) {
+        c.glfwSetWindowShouldClose(window, true);
+    }
+
+    if (c.glfwGetKey(window, c.GLFW_KEY_SPACE) == c.GLFW_PRESS and !static.last_frame_state_press) {
+        c.glPolygonMode(c.GL_FRONT_AND_BACK, if (!static.wireframe) c.GL_LINE else c.GL_FILL);
+        static.wireframe = !static.wireframe;
+        static.last_frame_state_press = true;
+    } else if (c.glfwGetKey(window, c.GLFW_KEY_SPACE) == c.GLFW_RELEASE) {
+        static.last_frame_state_press = false;
+    }
+
+    if (c.glfwGetKey(window, c.GLFW_KEY_ENTER) == c.GLFW_PRESS and !static.last_frame_state_press_enter) {
+        if (app_state.current_polycube == 6) {
+            app_state.current_polycube = 0;
+        } else {
+            app_state.current_polycube += 1;
+        }
+        static.last_frame_state_press_enter = true;
+    } else if (c.glfwGetKey(window, c.GLFW_KEY_ENTER) == c.GLFW_RELEASE) {
+        static.last_frame_state_press_enter = false;
+    }
+}
+
+
+fn new_entity() i32 {
+    entities.append(.{});
+    scene_graph_nodes.append(.{});
+    entities.items[entities.items.len - 1].scene_graph_node = scene_graph_nodes.items.len;
+    scene_graph_nodes.items[scene_graph_nodes.items.len - 1].entity = entities.items.len;
+    return entities.items.len;
+}
+
+fn get_entity(id: i32) ?*Entity {
+    if (entities.items[id - 1]) {
+        return &entities.items[id - 1];
+    }
+    return null;
+}
+
+fn get_scene_graph_node(id: i32) *SceneGraphNode {
+    if (scene_graph_nodes.items[id - 1]) {
+        return &scene_graph_nodes.items[id - 1];
+    }
+    return null;
+}
+
+fn new_graph_node() i32 {
+    scene_graph_nodes.append(.{});
+    return scene_graph_nodes.items.len;
+}
+
+fn draw_entity(entity: *Entity) void {
+    const modelUniformLoc = c.glGetUniformLocation(app_state.active_shader.prog_id, "model");
+    c.glUniformMatrix4fv(modelUniformLoc, 1, c.GL_FALSE, &get_scene_graph_node(entity.scene_graph_node).world);
+    c.glBindTexture(c.GL_TEXTURE_2D, entity.tex.tex_id);
+    c.glBindVertexArray(entity.mesh.vao);
+    c.glDrawArrays(c.GL_TRIANGLES, 0, entity.mesh.num_indices);
+    //c.glDrawElements(c.GL_TRIANGLES, entity.mesh.num_indices, c.GL_UNSIGNED_INT, 0);
+}
+
+fn create_polycube_from_repr(repr: *Voxel.Space) Polycube {
+    const polycube_id = new_graph_node();
+    get_scene_graph_node(polycube_id).init();
+    var x: usize = 1;
+    var y: usize = 1;
+    var z: usize = 1;
+    while (x < repr.dim_x) : (x += 1) {
+        while (y < repr.dim_y) : (y += 1) {
+            while (z < repr.dim_z) : (z += 1) {
+                if (Voxel.filledAt(repr, x, y, z)) {
+                    const polycube_segment = get_entity(new_entity());
+                    polycube_segment.mesh = &cube_mesh;
+                    polycube_segment.tex = &wall_tex;
+                    const graph_node = get_scene_graph_node(polycube_segment.scene_graph_node);
+                    graph_node.init();
+                    graph_node.translation = zm.f32x4(
+                        -((repr.dim_z - 1)/2.0) + z,
+                        ((repr.dim_x - 1)/2.0) - x,
+                        -((repr.dim_y - 1)/2.0) + y,
+                        0.0,
+                    );
+                    graph_node.update_local();
+                    get_scene_graph_node(polycube_id).children.append(polycube_segment.scene_graph_node);
+                }
+            }
+        }
+    }
+    const result = Polycube{
+        .graph_node = polycube_id,
+        .color = zm.f32x4s(1.0),
+    };
+    return result;
+}
+
+fn recalculate_scene_graph(top: *SceneGraphNode) void {
+    if (top.children.size() == 0) {
+        return;
+    }
+    for (top.children.items) |child_id| {
+        const graph_node = get_scene_graph_node(child_id);
+        graph_node.update_local();
+        graph_node.world = zm.mul(top.world, graph_node.local);
+        recalculate_scene_graph(graph_node);
+    }
+}
+
+pub fn main() void {
+    const window_dims = WindowDims{ 800, 600 };
+    const window = init_window_and_gl(&window_dims);
+    if (window == null) {
+        return -1;
+    }
+
+    app_state = GlobalAppState{
+        .current_polycube=0,
+        .last_polycube_visible=6,
+        .active_shader=null,
+        .polycubes={},
+    };
+
+    const phong_shader = Shader{};
+    phong_shader.init("../assets/shaders/phong-solid.vertex.glsl", "../assets/shaders/phong-solid.fragment.glsl");
+    app_state.active_shader = &phong_shader;
+
+    cube_mesh.init("../assets/models/c000000.obj");
+    wall_tex.init("../assets/textures/brick-wall.jpg");
+
+    const little_frame = Frame{ .width=80, .height=60, .x=20, .y=20 };
+    const big_frame = Frame{ .width=800, .height=600, .x=0, .y=0 };
+    const main_cam = Camera{};
+    const other_cam = Camera{};
+    little_frame.init(&other_cam);
+    big_frame.init(&main_cam);
+    const frames = [_]*Frame{ &big_frame, &little_frame };
+
+    const root_node = SceneGraphNode{};
+    root_node.init();
+
+    var i: usize = 0;
+    while (i < SomaSolve.STD_SOMA.items.len) : (i += 1) {
+        const voxel_space = voxel.Space{ SomaSolve.STD_SOMA[i], 3, 3, 3 };
+        voxel.cullEmptySpace(&voxel_space);
+        const polycube = create_polycube_from_repr(&voxel_space);
+        polycube.color = color.color_from_index(i);
+        app_state.polycubes.append(polycube);
+        root_node.children.append(app_state.polycubes.items[app_state.polycubes.items.len - 1].graph_node);
+    }
+
+    main_cam.pos = zm.f32x4(4.0, 4.0, 4.0, 0.0);
+    main_cam.look_at(0.0, 0.0, 0.0);
+
+    const light_pos = zm.f32x4(6.0, 6.0, 6.0, 0.0);
+
+    c.glUseProgram(app_state.active_shader.prog_id);
+    const view_loc = c.glGetUniformLocation(app_state.active_shader.prog_id, "view");
+    const proj_loc = c.glGetUniformLocation(app_state.active_shader.prog_id, "projection");
+    const light_pos_loc = c.glGetUniformLocation(app_state.active_shader.prog_id, "light_pos");
+    c.glUniform3fv(light_pos_loc, 1, &light_pos);
+    c.glUniformMatrix4fv(proj_loc, 1, GL_FALSE, &main_cam.proj);
+    c.glUniformMatrix4fv(view_loc, 1, GL_FALSE, &main_cam.view);
+
+    var last_frame = c.glfwGetTime();
+    var time_delta = 1.0/60.0;
+    while (!c.glfwWindowShouldClose(window)) {
+        time_delta = c.glfwGetTime() - last_frame;
+        process_input(window);
+
+        if (app_state.last_polycube_visible != app_state.current_polycube) {
+            app_state.polycubes[app_state.last_polycube_visible].hide();
+            app_state.polycubes[app_state.current_polycube].show();
+            app_state.last_polycube_visible = app_state.current_polycube;
+        }
+
+        c.glClearColor(0.0, 0.0, 0.0, 1.0);
+        c.glClear(c.GL_DEPTH_BUFFER_BIT | c.GL_COLOR_BUFFER_BIT);
+
+        c.gl_update_viewport(&window_dims, &big_frame);
+        const current_polycube = &app_state.polycubes[app_state.current_polycube];
+        c.get_scene_graph_node(current_polycube.graph_node).rotation = zm.quatFromRollPitchYaw(0.0, c.glfwGetTime() / 2.0, 0.0);
+
+        c.glBindVertexArray(cube_mesh.vao);
+        //glBindTexture(GL_TEXTURE_2D, entity.tex->tex_id);
+        recalculate_scene_graph(&root_node);
+        const model_uniform_loc = c.glGetUniformLocation(app_state.active_shader.prog_id, "model");
+        const solid_color_loc = c.glGetUniformLocation(app_state.active_shader.prog_id, "solid_color");
+        c.glUniform3fv(solid_color_loc, 1, &current_polycube.color);
+        while (entities.items) |entity| {
+            if (entity.visible) {
+                c.glUniformMatrix4fv(model_uniform_loc, 1, c.GL_FALSE, &get_scene_graph_node(entity.scene_graph_node).world);
+                c.glDrawArrays(c.GL_TRIANGLES, 0, entity.mesh.num_indices);
+                //glDrawElements(GL_TRIANGLES, entity->mesh->num_indices, GL_UNSIGNED_INT, 0);
+            }
+        }
+
+        c.glfwSwapBuffers(window);
+        c.glfwPollEvents();
+    }
+
+    c.glfwTerminate();
+    return 0;
+}
+
+//test "simple test" {
+//    var list = std.ArrayList(i32).init(std.testing.allocator);
+//    defer list.deinit(); // try commenting this out and see if zig detects the memory leak!
+//    try list.append(42);
+//    try std.testing.expectEqual(@as(i32, 42), list.pop());
+//}
diff --git a/vendor/loaders/stb_image.cpp b/vendor/loaders/stb_image.cpp
deleted file mode 100644
index 8ddfd1f..0000000
--- a/vendor/loaders/stb_image.cpp
+++ /dev/null
@@ -1,2 +0,0 @@
-#define STB_IMAGE_IMPLEMENTATION
-#include "stb_image.h"
diff --git a/vendor/loaders/tinyobj.cpp b/vendor/loaders/tinyobj.cpp
deleted file mode 100644
index bded088..0000000
--- a/vendor/loaders/tinyobj.cpp
+++ /dev/null
@@ -1,2 +0,0 @@
-#define TINYOBJLOADER_IMPLEMENTATION
-#include "tinyobj.h"
diff --git a/vendor/loaders/tinyobj.h b/vendor/loaders/tinyobj.h
deleted file mode 100644
index 3d86b90..0000000
--- a/vendor/loaders/tinyobj.h
+++ /dev/null
@@ -1,3455 +0,0 @@
-/*
-The MIT License (MIT)
-
-Copyright (c) 2012-Present, Syoyo Fujita and many contributors.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-//
-// version 2.0.0 : Add new object oriented API. 1.x API is still provided.
-//                 * Support line primitive.
-//                 * Support points primitive.
-//                 * Support multiple search path for .mtl(v1 API).
-//                 * Support vertex weight `vw`(as an tinyobj extension)
-//                 * Support escaped whitespece in mtllib
-//                 * Add robust triangulation using Mapbox earcut(TINYOBJLOADER_USE_MAPBOX_EARCUT).
-// version 1.4.0 : Modifed ParseTextureNameAndOption API
-// version 1.3.1 : Make ParseTextureNameAndOption API public
-// version 1.3.0 : Separate warning and error message(breaking API of LoadObj)
-// version 1.2.3 : Added color space extension('-colorspace') to tex opts.
-// version 1.2.2 : Parse multiple group names.
-// version 1.2.1 : Added initial support for line('l') primitive(PR #178)
-// version 1.2.0 : Hardened implementation(#175)
-// version 1.1.1 : Support smoothing groups(#162)
-// version 1.1.0 : Support parsing vertex color(#144)
-// version 1.0.8 : Fix parsing `g` tag just after `usemtl`(#138)
-// version 1.0.7 : Support multiple tex options(#126)
-// version 1.0.6 : Add TINYOBJLOADER_USE_DOUBLE option(#124)
-// version 1.0.5 : Ignore `Tr` when `d` exists in MTL(#43)
-// version 1.0.4 : Support multiple filenames for 'mtllib'(#112)
-// version 1.0.3 : Support parsing texture options(#85)
-// version 1.0.2 : Improve parsing speed by about a factor of 2 for large
-// files(#105)
-// version 1.0.1 : Fixes a shape is lost if obj ends with a 'usemtl'(#104)
-// version 1.0.0 : Change data structure. Change license from BSD to MIT.
-//
-
-//
-// Use this in *one* .cc
-//   #define TINYOBJLOADER_IMPLEMENTATION
-//   #include "tiny_obj_loader.h"
-//
-
-#ifndef TINY_OBJ_LOADER_H_
-#define TINY_OBJ_LOADER_H_
-
-#include <map>
-#include <string>
-#include <vector>
-
-namespace tinyobj {
-
-// TODO(syoyo): Better C++11 detection for older compiler
-#if __cplusplus > 199711L
-#define TINYOBJ_OVERRIDE override
-#else
-#define TINYOBJ_OVERRIDE
-#endif
-
-#ifdef __clang__
-#pragma clang diagnostic push
-#if __has_warning("-Wzero-as-null-pointer-constant")
-#pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant"
-#endif
-
-#pragma clang diagnostic ignored "-Wpadded"
-
-#endif
-
-// https://en.wikipedia.org/wiki/Wavefront_.obj_file says ...
-//
-//  -blendu on | off                       # set horizontal texture blending
-//  (default on)
-//  -blendv on | off                       # set vertical texture blending
-//  (default on)
-//  -boost real_value                      # boost mip-map sharpness
-//  -mm base_value gain_value              # modify texture map values (default
-//  0 1)
-//                                         #     base_value = brightness,
-//                                         gain_value = contrast
-//  -o u [v [w]]                           # Origin offset             (default
-//  0 0 0)
-//  -s u [v [w]]                           # Scale                     (default
-//  1 1 1)
-//  -t u [v [w]]                           # Turbulence                (default
-//  0 0 0)
-//  -texres resolution                     # texture resolution to create
-//  -clamp on | off                        # only render texels in the clamped
-//  0-1 range (default off)
-//                                         #   When unclamped, textures are
-//                                         repeated across a surface,
-//                                         #   when clamped, only texels which
-//                                         fall within the 0-1
-//                                         #   range are rendered.
-//  -bm mult_value                         # bump multiplier (for bump maps
-//  only)
-//
-//  -imfchan r | g | b | m | l | z         # specifies which channel of the file
-//  is used to
-//                                         # create a scalar or bump texture.
-//                                         r:red, g:green,
-//                                         # b:blue, m:matte, l:luminance,
-//                                         z:z-depth..
-//                                         # (the default for bump is 'l' and
-//                                         for decal is 'm')
-//  bump -imfchan r bumpmap.tga            # says to use the red channel of
-//  bumpmap.tga as the bumpmap
-//
-// For reflection maps...
-//
-//   -type sphere                           # specifies a sphere for a "refl"
-//   reflection map
-//   -type cube_top    | cube_bottom |      # when using a cube map, the texture
-//   file for each
-//         cube_front  | cube_back   |      # side of the cube is specified
-//         separately
-//         cube_left   | cube_right
-//
-// TinyObjLoader extension.
-//
-//   -colorspace SPACE                      # Color space of the texture. e.g.
-//   'sRGB` or 'linear'
-//
-
-#ifdef TINYOBJLOADER_USE_DOUBLE
-//#pragma message "using double"
-typedef double real_t;
-#else
-//#pragma message "using float"
-typedef float real_t;
-#endif
-
-typedef enum {
-  TEXTURE_TYPE_NONE,  // default
-  TEXTURE_TYPE_SPHERE,
-  TEXTURE_TYPE_CUBE_TOP,
-  TEXTURE_TYPE_CUBE_BOTTOM,
-  TEXTURE_TYPE_CUBE_FRONT,
-  TEXTURE_TYPE_CUBE_BACK,
-  TEXTURE_TYPE_CUBE_LEFT,
-  TEXTURE_TYPE_CUBE_RIGHT
-} texture_type_t;
-
-struct texture_option_t {
-  texture_type_t type;      // -type (default TEXTURE_TYPE_NONE)
-  real_t sharpness;         // -boost (default 1.0?)
-  real_t brightness;        // base_value in -mm option (default 0)
-  real_t contrast;          // gain_value in -mm option (default 1)
-  real_t origin_offset[3];  // -o u [v [w]] (default 0 0 0)
-  real_t scale[3];          // -s u [v [w]] (default 1 1 1)
-  real_t turbulence[3];     // -t u [v [w]] (default 0 0 0)
-  int texture_resolution;   // -texres resolution (No default value in the spec.
-                            // We'll use -1)
-  bool clamp;               // -clamp (default false)
-  char imfchan;  // -imfchan (the default for bump is 'l' and for decal is 'm')
-  bool blendu;   // -blendu (default on)
-  bool blendv;   // -blendv (default on)
-  real_t bump_multiplier;  // -bm (for bump maps only, default 1.0)
-
-  // extension
-  std::string colorspace;  // Explicitly specify color space of stored texel
-                           // value. Usually `sRGB` or `linear` (default empty).
-};
-
-struct material_t {
-  std::string name;
-
-  real_t ambient[3];
-  real_t diffuse[3];
-  real_t specular[3];
-  real_t transmittance[3];
-  real_t emission[3];
-  real_t shininess;
-  real_t ior;       // index of refraction
-  real_t dissolve;  // 1 == opaque; 0 == fully transparent
-  // illumination model (see http://www.fileformat.info/format/material/)
-  int illum;
-
-  int dummy;  // Suppress padding warning.
-
-  std::string ambient_texname;             // map_Ka
-  std::string diffuse_texname;             // map_Kd
-  std::string specular_texname;            // map_Ks
-  std::string specular_highlight_texname;  // map_Ns
-  std::string bump_texname;                // map_bump, map_Bump, bump
-  std::string displacement_texname;        // disp
-  std::string alpha_texname;               // map_d
-  std::string reflection_texname;          // refl
-
-  texture_option_t ambient_texopt;
-  texture_option_t diffuse_texopt;
-  texture_option_t specular_texopt;
-  texture_option_t specular_highlight_texopt;
-  texture_option_t bump_texopt;
-  texture_option_t displacement_texopt;
-  texture_option_t alpha_texopt;
-  texture_option_t reflection_texopt;
-
-  // PBR extension
-  // http://exocortex.com/blog/extending_wavefront_mtl_to_support_pbr
-  real_t roughness;            // [0, 1] default 0
-  real_t metallic;             // [0, 1] default 0
-  real_t sheen;                // [0, 1] default 0
-  real_t clearcoat_thickness;  // [0, 1] default 0
-  real_t clearcoat_roughness;  // [0, 1] default 0
-  real_t anisotropy;           // aniso. [0, 1] default 0
-  real_t anisotropy_rotation;  // anisor. [0, 1] default 0
-  real_t pad0;
-  std::string roughness_texname;  // map_Pr
-  std::string metallic_texname;   // map_Pm
-  std::string sheen_texname;      // map_Ps
-  std::string emissive_texname;   // map_Ke
-  std::string normal_texname;     // norm. For normal mapping.
-
-  texture_option_t roughness_texopt;
-  texture_option_t metallic_texopt;
-  texture_option_t sheen_texopt;
-  texture_option_t emissive_texopt;
-  texture_option_t normal_texopt;
-
-  int pad2;
-
-  std::map<std::string, std::string> unknown_parameter;
-
-#ifdef TINY_OBJ_LOADER_PYTHON_BINDING
-  // For pybind11
-  std::array<double, 3> GetDiffuse() {
-    std::array<double, 3> values;
-    values[0] = double(diffuse[0]);
-    values[1] = double(diffuse[1]);
-    values[2] = double(diffuse[2]);
-
-    return values;
-  }
-
-  std::array<double, 3> GetSpecular() {
-    std::array<double, 3> values;
-    values[0] = double(specular[0]);
-    values[1] = double(specular[1]);
-    values[2] = double(specular[2]);
-
-    return values;
-  }
-
-  std::array<double, 3> GetTransmittance() {
-    std::array<double, 3> values;
-    values[0] = double(transmittance[0]);
-    values[1] = double(transmittance[1]);
-    values[2] = double(transmittance[2]);
-
-    return values;
-  }
-
-  std::array<double, 3> GetEmission() {
-    std::array<double, 3> values;
-    values[0] = double(emission[0]);
-    values[1] = double(emission[1]);
-    values[2] = double(emission[2]);
-
-    return values;
-  }
-
-  std::array<double, 3> GetAmbient() {
-    std::array<double, 3> values;
-    values[0] = double(ambient[0]);
-    values[1] = double(ambient[1]);
-    values[2] = double(ambient[2]);
-
-    return values;
-  }
-
-  void SetDiffuse(std::array<double, 3> &a) {
-    diffuse[0] = real_t(a[0]);
-    diffuse[1] = real_t(a[1]);
-    diffuse[2] = real_t(a[2]);
-  }
-
-  void SetAmbient(std::array<double, 3> &a) {
-    ambient[0] = real_t(a[0]);
-    ambient[1] = real_t(a[1]);
-    ambient[2] = real_t(a[2]);
-  }
-
-  void SetSpecular(std::array<double, 3> &a) {
-    specular[0] = real_t(a[0]);
-    specular[1] = real_t(a[1]);
-    specular[2] = real_t(a[2]);
-  }
-
-  void SetTransmittance(std::array<double, 3> &a) {
-    transmittance[0] = real_t(a[0]);
-    transmittance[1] = real_t(a[1]);
-    transmittance[2] = real_t(a[2]);
-  }
-
-  std::string GetCustomParameter(const std::string &key) {
-    std::map<std::string, std::string>::const_iterator it =
-        unknown_parameter.find(key);
-
-    if (it != unknown_parameter.end()) {
-      return it->second;
-    }
-    return std::string();
-  }
-
-#endif
-};
-
-struct tag_t {
-  std::string name;
-
-  std::vector<int> intValues;
-  std::vector<real_t> floatValues;
-  std::vector<std::string> stringValues;
-};
-
-struct joint_and_weight_t {
-  int joint_id;
-  real_t weight;
-};
-
-struct skin_weight_t {
-  int vertex_id;  // Corresponding vertex index in `attrib_t::vertices`.
-                  // Compared to `index_t`, this index must be positive and
-                  // start with 0(does not allow relative indexing)
-  std::vector<joint_and_weight_t> weightValues;
-};
-
-// Index struct to support different indices for vtx/normal/texcoord.
-// -1 means not used.
-struct index_t {
-  int vertex_index;
-  int normal_index;
-  int texcoord_index;
-};
-
-struct mesh_t {
-  std::vector<index_t> indices;
-  std::vector<unsigned char>
-      num_face_vertices;          // The number of vertices per
-                                  // face. 3 = triangle, 4 = quad,
-                                  // ... Up to 255 vertices per face.
-  std::vector<int> material_ids;  // per-face material ID
-  std::vector<unsigned int> smoothing_group_ids;  // per-face smoothing group
-                                                  // ID(0 = off. positive value
-                                                  // = group id)
-  std::vector<tag_t> tags;                        // SubD tag
-};
-
-// struct path_t {
-//  std::vector<int> indices;  // pairs of indices for lines
-//};
-
-struct lines_t {
-  // Linear flattened indices.
-  std::vector<index_t> indices;        // indices for vertices(poly lines)
-  std::vector<int> num_line_vertices;  // The number of vertices per line.
-};
-
-struct points_t {
-  std::vector<index_t> indices;  // indices for points
-};
-
-struct shape_t {
-  std::string name;
-  mesh_t mesh;
-  lines_t lines;
-  points_t points;
-};
-
-// Vertex attributes
-struct attrib_t {
-  std::vector<real_t> vertices;  // 'v'(xyz)
-
-  // For backward compatibility, we store vertex weight in separate array.
-  std::vector<real_t> vertex_weights;  // 'v'(w)
-  std::vector<real_t> normals;         // 'vn'
-  std::vector<real_t> texcoords;       // 'vt'(uv)
-
-  // For backward compatibility, we store texture coordinate 'w' in separate
-  // array.
-  std::vector<real_t> texcoord_ws;  // 'vt'(w)
-  std::vector<real_t> colors;       // extension: vertex colors
-
-  //
-  // TinyObj extension.
-  //
-
-  // NOTE(syoyo): array index is based on the appearance order.
-  // To get a corresponding skin weight for a specific vertex id `vid`,
-  // Need to reconstruct a look up table: `skin_weight_t::vertex_id` == `vid`
-  // (e.g. using std::map, std::unordered_map)
-  std::vector<skin_weight_t> skin_weights;
-
-  attrib_t() {}
-
-  //
-  // For pybind11
-  //
-  const std::vector<real_t> &GetVertices() const { return vertices; }
-
-  const std::vector<real_t> &GetVertexWeights() const { return vertex_weights; }
-};
-
-struct callback_t {
-  // W is optional and set to 1 if there is no `w` item in `v` line
-  void (*vertex_cb)(void *user_data, real_t x, real_t y, real_t z, real_t w);
-  void (*vertex_color_cb)(void *user_data, real_t x, real_t y, real_t z,
-                          real_t r, real_t g, real_t b, bool has_color);
-  void (*normal_cb)(void *user_data, real_t x, real_t y, real_t z);
-
-  // y and z are optional and set to 0 if there is no `y` and/or `z` item(s) in
-  // `vt` line.
-  void (*texcoord_cb)(void *user_data, real_t x, real_t y, real_t z);
-
-  // called per 'f' line. num_indices is the number of face indices(e.g. 3 for
-  // triangle, 4 for quad)
-  // 0 will be passed for undefined index in index_t members.
-  void (*index_cb)(void *user_data, index_t *indices, int num_indices);
-  // `name` material name, `material_id` = the array index of material_t[]. -1
-  // if
-  // a material not found in .mtl
-  void (*usemtl_cb)(void *user_data, const char *name, int material_id);
-  // `materials` = parsed material data.
-  void (*mtllib_cb)(void *user_data, const material_t *materials,
-                    int num_materials);
-  // There may be multiple group names
-  void (*group_cb)(void *user_data, const char **names, int num_names);
-  void (*object_cb)(void *user_data, const char *name);
-
-  callback_t()
-      : vertex_cb(NULL),
-        normal_cb(NULL),
-        texcoord_cb(NULL),
-        index_cb(NULL),
-        usemtl_cb(NULL),
-        mtllib_cb(NULL),
-        group_cb(NULL),
-        object_cb(NULL) {}
-};
-
-class MaterialReader {
- public:
-  MaterialReader() {}
-  virtual ~MaterialReader();
-
-  virtual bool operator()(const std::string &matId,
-                          std::vector<material_t> *materials,
-                          std::map<std::string, int> *matMap, std::string *warn,
-                          std::string *err) = 0;
-};
-
-///
-/// Read .mtl from a file.
-///
-class MaterialFileReader : public MaterialReader {
- public:
-  // Path could contain separator(';' in Windows, ':' in Posix)
-  explicit MaterialFileReader(const std::string &mtl_basedir)
-      : m_mtlBaseDir(mtl_basedir) {}
-  virtual ~MaterialFileReader() TINYOBJ_OVERRIDE {}
-  virtual bool operator()(const std::string &matId,
-                          std::vector<material_t> *materials,
-                          std::map<std::string, int> *matMap, std::string *warn,
-                          std::string *err) TINYOBJ_OVERRIDE;
-
- private:
-  std::string m_mtlBaseDir;
-};
-
-///
-/// Read .mtl from a stream.
-///
-class MaterialStreamReader : public MaterialReader {
- public:
-  explicit MaterialStreamReader(std::istream &inStream)
-      : m_inStream(inStream) {}
-  virtual ~MaterialStreamReader() TINYOBJ_OVERRIDE {}
-  virtual bool operator()(const std::string &matId,
-                          std::vector<material_t> *materials,
-                          std::map<std::string, int> *matMap, std::string *warn,
-                          std::string *err) TINYOBJ_OVERRIDE;
-
- private:
-  std::istream &m_inStream;
-};
-
-// v2 API
-struct ObjReaderConfig {
-  bool triangulate;  // triangulate polygon?
-
-  // Currently not used.
-  // "simple" or empty: Create triangle fan
-  // "earcut": Use the algorithm based on Ear clipping
-  std::string triangulation_method;
-
-  /// Parse vertex color.
-  /// If vertex color is not present, its filled with default value.
-  /// false = no vertex color
-  /// This will increase memory of parsed .obj
-  bool vertex_color;
-
-  ///
-  /// Search path to .mtl file.
-  /// Default = "" = search from the same directory of .obj file.
-  /// Valid only when loading .obj from a file.
-  ///
-  std::string mtl_search_path;
-
-  ObjReaderConfig()
-      : triangulate(true), triangulation_method("simple"), vertex_color(true) {}
-};
-
-///
-/// Wavefront .obj reader class(v2 API)
-///
-class ObjReader {
- public:
-  ObjReader() : valid_(false) {}
-
-  ///
-  /// Load .obj and .mtl from a file.
-  ///
-  /// @param[in] filename wavefront .obj filename
-  /// @param[in] config Reader configuration
-  ///
-  bool ParseFromFile(const std::string &filename,
-                     const ObjReaderConfig &config = ObjReaderConfig());
-
-  ///
-  /// Parse .obj from a text string.
-  /// Need to supply .mtl text string by `mtl_text`.
-  /// This function ignores `mtllib` line in .obj text.
-  ///
-  /// @param[in] obj_text wavefront .obj filename
-  /// @param[in] mtl_text wavefront .mtl filename
-  /// @param[in] config Reader configuration
-  ///
-  bool ParseFromString(const std::string &obj_text, const std::string &mtl_text,
-                       const ObjReaderConfig &config = ObjReaderConfig());
-
-  ///
-  /// .obj was loaded or parsed correctly.
-  ///
-  bool Valid() const { return valid_; }
-
-  const attrib_t &GetAttrib() const { return attrib_; }
-
-  const std::vector<shape_t> &GetShapes() const { return shapes_; }
-
-  const std::vector<material_t> &GetMaterials() const { return materials_; }
-
-  ///
-  /// Warning message(may be filled after `Load` or `Parse`)
-  ///
-  const std::string &Warning() const { return warning_; }
-
-  ///
-  /// Error message(filled when `Load` or `Parse` failed)
-  ///
-  const std::string &Error() const { return error_; }
-
- private:
-  bool valid_;
-
-  attrib_t attrib_;
-  std::vector<shape_t> shapes_;
-  std::vector<material_t> materials_;
-
-  std::string warning_;
-  std::string error_;
-};
-
-/// ==>>========= Legacy v1 API =============================================
-
-/// Loads .obj from a file.
-/// 'attrib', 'shapes' and 'materials' will be filled with parsed shape data
-/// 'shapes' will be filled with parsed shape data
-/// Returns true when loading .obj become success.
-/// Returns warning message into `warn`, and error message into `err`
-/// 'mtl_basedir' is optional, and used for base directory for .mtl file.
-/// In default(`NULL'), .mtl file is searched from an application's working
-/// directory.
-/// 'triangulate' is optional, and used whether triangulate polygon face in .obj
-/// or not.
-/// Option 'default_vcols_fallback' specifies whether vertex colors should
-/// always be defined, even if no colors are given (fallback to white).
-bool LoadObj(attrib_t *attrib, std::vector<shape_t> *shapes,
-             std::vector<material_t> *materials, std::string *warn,
-             std::string *err, const char *filename,
-             const char *mtl_basedir = NULL, bool triangulate = true,
-             bool default_vcols_fallback = true);
-
-/// Loads .obj from a file with custom user callback.
-/// .mtl is loaded as usual and parsed material_t data will be passed to
-/// `callback.mtllib_cb`.
-/// Returns true when loading .obj/.mtl become success.
-/// Returns warning message into `warn`, and error message into `err`
-/// See `examples/callback_api/` for how to use this function.
-bool LoadObjWithCallback(std::istream &inStream, const callback_t &callback,
-                         void *user_data = NULL,
-                         MaterialReader *readMatFn = NULL,
-                         std::string *warn = NULL, std::string *err = NULL);
-
-/// Loads object from a std::istream, uses `readMatFn` to retrieve
-/// std::istream for materials.
-/// Returns true when loading .obj become success.
-/// Returns warning and error message into `err`
-bool LoadObj(attrib_t *attrib, std::vector<shape_t> *shapes,
-             std::vector<material_t> *materials, std::string *warn,
-             std::string *err, std::istream *inStream,
-             MaterialReader *readMatFn = NULL, bool triangulate = true,
-             bool default_vcols_fallback = true);
-
-/// Loads materials into std::map
-void LoadMtl(std::map<std::string, int> *material_map,
-             std::vector<material_t> *materials, std::istream *inStream,
-             std::string *warning, std::string *err);
-
-///
-/// Parse texture name and texture option for custom texture parameter through
-/// material::unknown_parameter
-///
-/// @param[out] texname Parsed texture name
-/// @param[out] texopt Parsed texopt
-/// @param[in] linebuf Input string
-///
-bool ParseTextureNameAndOption(std::string *texname, texture_option_t *texopt,
-                               const char *linebuf);
-
-/// =<<========== Legacy v1 API =============================================
-
-}  // namespace tinyobj
-
-#endif  // TINY_OBJ_LOADER_H_
-
-#ifdef TINYOBJLOADER_IMPLEMENTATION
-#include <cassert>
-#include <cctype>
-#include <cmath>
-#include <cstddef>
-#include <cstdlib>
-#include <cstring>
-#include <fstream>
-#include <limits>
-#include <set>
-#include <sstream>
-#include <utility>
-
-#ifdef TINYOBJLOADER_USE_MAPBOX_EARCUT
-
-#ifdef TINYOBJLOADER_DONOT_INCLUDE_MAPBOX_EARCUT
-// Assume earcut.hpp is included outside of tiny_obj_loader.h
-#else
-
-#ifdef __clang__
-#pragma clang diagnostic push
-#pragma clang diagnostic ignored "-Weverything"
-#endif
-
-#include <array>
-#include "mapbox/earcut.hpp"
-
-#ifdef __clang__
-#pragma clang diagnostic pop
-#endif
-
-#endif
-
-#endif  // TINYOBJLOADER_USE_MAPBOX_EARCUT
-
-namespace tinyobj {
-
-MaterialReader::~MaterialReader() {}
-
-struct vertex_index_t {
-  int v_idx, vt_idx, vn_idx;
-  vertex_index_t() : v_idx(-1), vt_idx(-1), vn_idx(-1) {}
-  explicit vertex_index_t(int idx) : v_idx(idx), vt_idx(idx), vn_idx(idx) {}
-  vertex_index_t(int vidx, int vtidx, int vnidx)
-      : v_idx(vidx), vt_idx(vtidx), vn_idx(vnidx) {}
-};
-
-// Internal data structure for face representation
-// index + smoothing group.
-struct face_t {
-  unsigned int
-      smoothing_group_id;  // smoothing group id. 0 = smoothing groupd is off.
-  int pad_;
-  std::vector<vertex_index_t> vertex_indices;  // face vertex indices.
-
-  face_t() : smoothing_group_id(0), pad_(0) {}
-};
-
-// Internal data structure for line representation
-struct __line_t {
-  // l v1/vt1 v2/vt2 ...
-  // In the specification, line primitrive does not have normal index, but
-  // TinyObjLoader allow it
-  std::vector<vertex_index_t> vertex_indices;
-};
-
-// Internal data structure for points representation
-struct __points_t {
-  // p v1 v2 ...
-  // In the specification, point primitrive does not have normal index and
-  // texture coord index, but TinyObjLoader allow it.
-  std::vector<vertex_index_t> vertex_indices;
-};
-
-struct tag_sizes {
-  tag_sizes() : num_ints(0), num_reals(0), num_strings(0) {}
-  int num_ints;
-  int num_reals;
-  int num_strings;
-};
-
-struct obj_shape {
-  std::vector<real_t> v;
-  std::vector<real_t> vn;
-  std::vector<real_t> vt;
-};
-
-//
-// Manages group of primitives(face, line, points, ...)
-struct PrimGroup {
-  std::vector<face_t> faceGroup;
-  std::vector<__line_t> lineGroup;
-  std::vector<__points_t> pointsGroup;
-
-  void clear() {
-    faceGroup.clear();
-    lineGroup.clear();
-    pointsGroup.clear();
-  }
-
-  bool IsEmpty() const {
-    return faceGroup.empty() && lineGroup.empty() && pointsGroup.empty();
-  }
-
-  // TODO(syoyo): bspline, surface, ...
-};
-
-// See
-// http://stackoverflow.com/questions/6089231/getting-std-ifstream-to-handle-lf-cr-and-crlf
-static std::istream &safeGetline(std::istream &is, std::string &t) {
-  t.clear();
-
-  // The characters in the stream are read one-by-one using a std::streambuf.
-  // That is faster than reading them one-by-one using the std::istream.
-  // Code that uses streambuf this way must be guarded by a sentry object.
-  // The sentry object performs various tasks,
-  // such as thread synchronization and updating the stream state.
-
-  std::istream::sentry se(is, true);
-  std::streambuf *sb = is.rdbuf();
-
-  if (se) {
-    for (;;) {
-      int c = sb->sbumpc();
-      switch (c) {
-        case '\n':
-          return is;
-        case '\r':
-          if (sb->sgetc() == '\n') sb->sbumpc();
-          return is;
-        case EOF:
-          // Also handle the case when the last line has no line ending
-          if (t.empty()) is.setstate(std::ios::eofbit);
-          return is;
-        default:
-          t += static_cast<char>(c);
-      }
-    }
-  }
-
-  return is;
-}
-
-#define IS_SPACE(x) (((x) == ' ') || ((x) == '\t'))
-#define IS_DIGIT(x) \
-  (static_cast<unsigned int>((x) - '0') < static_cast<unsigned int>(10))
-#define IS_NEW_LINE(x) (((x) == '\r') || ((x) == '\n') || ((x) == '\0'))
-
-template <typename T>
-static inline std::string toString(const T &t) {
-  std::stringstream ss;
-  ss << t;
-  return ss.str();
-}
-
-struct warning_context
-{
-	std::string *warn;
-	size_t line_number;
-};
-
-// Make index zero-base, and also support relative index.
-static inline bool fixIndex(int idx, int n, int *ret, bool allow_zero, const warning_context &context) {
-  if (!ret) {
-    return false;
-  }
-
-  if (idx > 0) {
-    (*ret) = idx - 1;
-    return true;
-  }
-
-  if (idx == 0) {
-    // zero is not allowed according to the spec.
-    if (context.warn) {
-      (*context.warn) += "A zero value index found (will have a value of -1 for normal and tex indices. Line "
-          + toString(context.line_number) + ").\n";
-    }
-
-    (*ret) = idx - 1;
-    return allow_zero;
-  }
-
-  if (idx < 0) {
-    (*ret) = n + idx;  // negative value = relative
-    return true;
-  }
-
-  return false;  // never reach here.
-}
-
-static inline std::string parseString(const char **token) {
-  std::string s;
-  (*token) += strspn((*token), " \t");
-  size_t e = strcspn((*token), " \t\r");
-  s = std::string((*token), &(*token)[e]);
-  (*token) += e;
-  return s;
-}
-
-static inline int parseInt(const char **token) {
-  (*token) += strspn((*token), " \t");
-  int i = atoi((*token));
-  (*token) += strcspn((*token), " \t\r");
-  return i;
-}
-
-// Tries to parse a floating point number located at s.
-//
-// s_end should be a location in the string where reading should absolutely
-// stop. For example at the end of the string, to prevent buffer overflows.
-//
-// Parses the following EBNF grammar:
-//   sign    = "+" | "-" ;
-//   END     = ? anything not in digit ?
-//   digit   = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" ;
-//   integer = [sign] , digit , {digit} ;
-//   decimal = integer , ["." , integer] ;
-//   float   = ( decimal , END ) | ( decimal , ("E" | "e") , integer , END ) ;
-//
-//  Valid strings are for example:
-//   -0  +3.1417e+2  -0.0E-3  1.0324  -1.41   11e2
-//
-// If the parsing is a success, result is set to the parsed value and true
-// is returned.
-//
-// The function is greedy and will parse until any of the following happens:
-//  - a non-conforming character is encountered.
-//  - s_end is reached.
-//
-// The following situations triggers a failure:
-//  - s >= s_end.
-//  - parse failure.
-//
-static bool tryParseDouble(const char *s, const char *s_end, double *result) {
-  if (s >= s_end) {
-    return false;
-  }
-
-  double mantissa = 0.0;
-  // This exponent is base 2 rather than 10.
-  // However the exponent we parse is supposed to be one of ten,
-  // thus we must take care to convert the exponent/and or the
-  // mantissa to a * 2^E, where a is the mantissa and E is the
-  // exponent.
-  // To get the final double we will use ldexp, it requires the
-  // exponent to be in base 2.
-  int exponent = 0;
-
-  // NOTE: THESE MUST BE DECLARED HERE SINCE WE ARE NOT ALLOWED
-  // TO JUMP OVER DEFINITIONS.
-  char sign = '+';
-  char exp_sign = '+';
-  char const *curr = s;
-
-  // How many characters were read in a loop.
-  int read = 0;
-  // Tells whether a loop terminated due to reaching s_end.
-  bool end_not_reached = false;
-  bool leading_decimal_dots = false;
-
-  /*
-          BEGIN PARSING.
-  */
-
-  // Find out what sign we've got.
-  if (*curr == '+' || *curr == '-') {
-    sign = *curr;
-    curr++;
-    if ((curr != s_end) && (*curr == '.')) {
-      // accept. Somethig like `.7e+2`, `-.5234`
-      leading_decimal_dots = true;
-    }
-  } else if (IS_DIGIT(*curr)) { /* Pass through. */
-  } else if (*curr == '.') {
-    // accept. Somethig like `.7e+2`, `-.5234`
-    leading_decimal_dots = true;
-  } else {
-    goto fail;
-  }
-
-  // Read the integer part.
-  end_not_reached = (curr != s_end);
-  if (!leading_decimal_dots) {
-    while (end_not_reached && IS_DIGIT(*curr)) {
-      mantissa *= 10;
-      mantissa += static_cast<int>(*curr - 0x30);
-      curr++;
-      read++;
-      end_not_reached = (curr != s_end);
-    }
-
-    // We must make sure we actually got something.
-    if (read == 0) goto fail;
-  }
-
-  // We allow numbers of form "#", "###" etc.
-  if (!end_not_reached) goto assemble;
-
-  // Read the decimal part.
-  if (*curr == '.') {
-    curr++;
-    read = 1;
-    end_not_reached = (curr != s_end);
-    while (end_not_reached && IS_DIGIT(*curr)) {
-      static const double pow_lut[] = {
-          1.0, 0.1, 0.01, 0.001, 0.0001, 0.00001, 0.000001, 0.0000001,
-      };
-      const int lut_entries = sizeof pow_lut / sizeof pow_lut[0];
-
-      // NOTE: Don't use powf here, it will absolutely murder precision.
-      mantissa += static_cast<int>(*curr - 0x30) *
-                  (read < lut_entries ? pow_lut[read] : std::pow(10.0, -read));
-      read++;
-      curr++;
-      end_not_reached = (curr != s_end);
-    }
-  } else if (*curr == 'e' || *curr == 'E') {
-  } else {
-    goto assemble;
-  }
-
-  if (!end_not_reached) goto assemble;
-
-  // Read the exponent part.
-  if (*curr == 'e' || *curr == 'E') {
-    curr++;
-    // Figure out if a sign is present and if it is.
-    end_not_reached = (curr != s_end);
-    if (end_not_reached && (*curr == '+' || *curr == '-')) {
-      exp_sign = *curr;
-      curr++;
-    } else if (IS_DIGIT(*curr)) { /* Pass through. */
-    } else {
-      // Empty E is not allowed.
-      goto fail;
-    }
-
-    read = 0;
-    end_not_reached = (curr != s_end);
-    while (end_not_reached && IS_DIGIT(*curr)) {
-      // To avoid annoying MSVC's min/max macro definiton,
-      // Use hardcoded int max value
-      if (exponent > (2147483647/10)) { // 2147483647 = std::numeric_limits<int>::max()
-        // Integer overflow
-        goto fail;
-      }
-      exponent *= 10;
-      exponent += static_cast<int>(*curr - 0x30);
-      curr++;
-      read++;
-      end_not_reached = (curr != s_end);
-    }
-    exponent *= (exp_sign == '+' ? 1 : -1);
-    if (read == 0) goto fail;
-  }
-
-assemble:
-  *result = (sign == '+' ? 1 : -1) *
-            (exponent ? std::ldexp(mantissa * std::pow(5.0, exponent), exponent)
-                      : mantissa);
-  return true;
-fail:
-  return false;
-}
-
-static inline real_t parseReal(const char **token, double default_value = 0.0) {
-  (*token) += strspn((*token), " \t");
-  const char *end = (*token) + strcspn((*token), " \t\r");
-  double val = default_value;
-  tryParseDouble((*token), end, &val);
-  real_t f = static_cast<real_t>(val);
-  (*token) = end;
-  return f;
-}
-
-static inline bool parseReal(const char **token, real_t *out) {
-  (*token) += strspn((*token), " \t");
-  const char *end = (*token) + strcspn((*token), " \t\r");
-  double val;
-  bool ret = tryParseDouble((*token), end, &val);
-  if (ret) {
-    real_t f = static_cast<real_t>(val);
-    (*out) = f;
-  }
-  (*token) = end;
-  return ret;
-}
-
-static inline void parseReal2(real_t *x, real_t *y, const char **token,
-                              const double default_x = 0.0,
-                              const double default_y = 0.0) {
-  (*x) = parseReal(token, default_x);
-  (*y) = parseReal(token, default_y);
-}
-
-static inline void parseReal3(real_t *x, real_t *y, real_t *z,
-                              const char **token, const double default_x = 0.0,
-                              const double default_y = 0.0,
-                              const double default_z = 0.0) {
-  (*x) = parseReal(token, default_x);
-  (*y) = parseReal(token, default_y);
-  (*z) = parseReal(token, default_z);
-}
-
-static inline void parseV(real_t *x, real_t *y, real_t *z, real_t *w,
-                          const char **token, const double default_x = 0.0,
-                          const double default_y = 0.0,
-                          const double default_z = 0.0,
-                          const double default_w = 1.0) {
-  (*x) = parseReal(token, default_x);
-  (*y) = parseReal(token, default_y);
-  (*z) = parseReal(token, default_z);
-  (*w) = parseReal(token, default_w);
-}
-
-// Extension: parse vertex with colors(6 items)
-static inline bool parseVertexWithColor(real_t *x, real_t *y, real_t *z,
-                                        real_t *r, real_t *g, real_t *b,
-                                        const char **token,
-                                        const double default_x = 0.0,
-                                        const double default_y = 0.0,
-                                        const double default_z = 0.0) {
-  (*x) = parseReal(token, default_x);
-  (*y) = parseReal(token, default_y);
-  (*z) = parseReal(token, default_z);
-
-  const bool found_color =
-      parseReal(token, r) && parseReal(token, g) && parseReal(token, b);
-
-  if (!found_color) {
-    (*r) = (*g) = (*b) = 1.0;
-  }
-
-  return found_color;
-}
-
-static inline bool parseOnOff(const char **token, bool default_value = true) {
-  (*token) += strspn((*token), " \t");
-  const char *end = (*token) + strcspn((*token), " \t\r");
-
-  bool ret = default_value;
-  if ((0 == strncmp((*token), "on", 2))) {
-    ret = true;
-  } else if ((0 == strncmp((*token), "off", 3))) {
-    ret = false;
-  }
-
-  (*token) = end;
-  return ret;
-}
-
-static inline texture_type_t parseTextureType(
-    const char **token, texture_type_t default_value = TEXTURE_TYPE_NONE) {
-  (*token) += strspn((*token), " \t");
-  const char *end = (*token) + strcspn((*token), " \t\r");
-  texture_type_t ty = default_value;
-
-  if ((0 == strncmp((*token), "cube_top", strlen("cube_top")))) {
-    ty = TEXTURE_TYPE_CUBE_TOP;
-  } else if ((0 == strncmp((*token), "cube_bottom", strlen("cube_bottom")))) {
-    ty = TEXTURE_TYPE_CUBE_BOTTOM;
-  } else if ((0 == strncmp((*token), "cube_left", strlen("cube_left")))) {
-    ty = TEXTURE_TYPE_CUBE_LEFT;
-  } else if ((0 == strncmp((*token), "cube_right", strlen("cube_right")))) {
-    ty = TEXTURE_TYPE_CUBE_RIGHT;
-  } else if ((0 == strncmp((*token), "cube_front", strlen("cube_front")))) {
-    ty = TEXTURE_TYPE_CUBE_FRONT;
-  } else if ((0 == strncmp((*token), "cube_back", strlen("cube_back")))) {
-    ty = TEXTURE_TYPE_CUBE_BACK;
-  } else if ((0 == strncmp((*token), "sphere", strlen("sphere")))) {
-    ty = TEXTURE_TYPE_SPHERE;
-  }
-
-  (*token) = end;
-  return ty;
-}
-
-static tag_sizes parseTagTriple(const char **token) {
-  tag_sizes ts;
-
-  (*token) += strspn((*token), " \t");
-  ts.num_ints = atoi((*token));
-  (*token) += strcspn((*token), "/ \t\r");
-  if ((*token)[0] != '/') {
-    return ts;
-  }
-
-  (*token)++;  // Skip '/'
-
-  (*token) += strspn((*token), " \t");
-  ts.num_reals = atoi((*token));
-  (*token) += strcspn((*token), "/ \t\r");
-  if ((*token)[0] != '/') {
-    return ts;
-  }
-  (*token)++;  // Skip '/'
-
-  ts.num_strings = parseInt(token);
-
-  return ts;
-}
-
-// Parse triples with index offsets: i, i/j/k, i//k, i/j
-static bool parseTriple(const char **token, int vsize, int vnsize, int vtsize,
-                        vertex_index_t *ret, const warning_context &context) {
-  if (!ret) {
-    return false;
-  }
-
-  vertex_index_t vi(-1);
-
-  if (!fixIndex(atoi((*token)), vsize, &vi.v_idx, false, context)) {
-    return false;
-  }
-
-  (*token) += strcspn((*token), "/ \t\r");
-  if ((*token)[0] != '/') {
-    (*ret) = vi;
-    return true;
-  }
-  (*token)++;
-
-  // i//k
-  if ((*token)[0] == '/') {
-    (*token)++;
-    if (!fixIndex(atoi((*token)), vnsize, &vi.vn_idx, true, context)) {
-      return false;
-    }
-    (*token) += strcspn((*token), "/ \t\r");
-    (*ret) = vi;
-    return true;
-  }
-
-  // i/j/k or i/j
-  if (!fixIndex(atoi((*token)), vtsize, &vi.vt_idx, true, context)) {
-    return false;
-  }
-
-  (*token) += strcspn((*token), "/ \t\r");
-  if ((*token)[0] != '/') {
-    (*ret) = vi;
-    return true;
-  }
-
-  // i/j/k
-  (*token)++;  // skip '/'
-  if (!fixIndex(atoi((*token)), vnsize, &vi.vn_idx, true, context)) {
-    return false;
-  }
-  (*token) += strcspn((*token), "/ \t\r");
-
-  (*ret) = vi;
-
-  return true;
-}
-
-// Parse raw triples: i, i/j/k, i//k, i/j
-static vertex_index_t parseRawTriple(const char **token) {
-  vertex_index_t vi(static_cast<int>(0));  // 0 is an invalid index in OBJ
-
-  vi.v_idx = atoi((*token));
-  (*token) += strcspn((*token), "/ \t\r");
-  if ((*token)[0] != '/') {
-    return vi;
-  }
-  (*token)++;
-
-  // i//k
-  if ((*token)[0] == '/') {
-    (*token)++;
-    vi.vn_idx = atoi((*token));
-    (*token) += strcspn((*token), "/ \t\r");
-    return vi;
-  }
-
-  // i/j/k or i/j
-  vi.vt_idx = atoi((*token));
-  (*token) += strcspn((*token), "/ \t\r");
-  if ((*token)[0] != '/') {
-    return vi;
-  }
-
-  // i/j/k
-  (*token)++;  // skip '/'
-  vi.vn_idx = atoi((*token));
-  (*token) += strcspn((*token), "/ \t\r");
-  return vi;
-}
-
-bool ParseTextureNameAndOption(std::string *texname, texture_option_t *texopt,
-                               const char *linebuf) {
-  // @todo { write more robust lexer and parser. }
-  bool found_texname = false;
-  std::string texture_name;
-
-  const char *token = linebuf;  // Assume line ends with NULL
-
-  while (!IS_NEW_LINE((*token))) {
-    token += strspn(token, " \t");  // skip space
-    if ((0 == strncmp(token, "-blendu", 7)) && IS_SPACE((token[7]))) {
-      token += 8;
-      texopt->blendu = parseOnOff(&token, /* default */ true);
-    } else if ((0 == strncmp(token, "-blendv", 7)) && IS_SPACE((token[7]))) {
-      token += 8;
-      texopt->blendv = parseOnOff(&token, /* default */ true);
-    } else if ((0 == strncmp(token, "-clamp", 6)) && IS_SPACE((token[6]))) {
-      token += 7;
-      texopt->clamp = parseOnOff(&token, /* default */ true);
-    } else if ((0 == strncmp(token, "-boost", 6)) && IS_SPACE((token[6]))) {
-      token += 7;
-      texopt->sharpness = parseReal(&token, 1.0);
-    } else if ((0 == strncmp(token, "-bm", 3)) && IS_SPACE((token[3]))) {
-      token += 4;
-      texopt->bump_multiplier = parseReal(&token, 1.0);
-    } else if ((0 == strncmp(token, "-o", 2)) && IS_SPACE((token[2]))) {
-      token += 3;
-      parseReal3(&(texopt->origin_offset[0]), &(texopt->origin_offset[1]),
-                 &(texopt->origin_offset[2]), &token);
-    } else if ((0 == strncmp(token, "-s", 2)) && IS_SPACE((token[2]))) {
-      token += 3;
-      parseReal3(&(texopt->scale[0]), &(texopt->scale[1]), &(texopt->scale[2]),
-                 &token, 1.0, 1.0, 1.0);
-    } else if ((0 == strncmp(token, "-t", 2)) && IS_SPACE((token[2]))) {
-      token += 3;
-      parseReal3(&(texopt->turbulence[0]), &(texopt->turbulence[1]),
-                 &(texopt->turbulence[2]), &token);
-    } else if ((0 == strncmp(token, "-type", 5)) && IS_SPACE((token[5]))) {
-      token += 5;
-      texopt->type = parseTextureType((&token), TEXTURE_TYPE_NONE);
-    } else if ((0 == strncmp(token, "-texres", 7)) && IS_SPACE((token[7]))) {
-      token += 7;
-      // TODO(syoyo): Check if arg is int type.
-      texopt->texture_resolution = parseInt(&token);
-    } else if ((0 == strncmp(token, "-imfchan", 8)) && IS_SPACE((token[8]))) {
-      token += 9;
-      token += strspn(token, " \t");
-      const char *end = token + strcspn(token, " \t\r");
-      if ((end - token) == 1) {  // Assume one char for -imfchan
-        texopt->imfchan = (*token);
-      }
-      token = end;
-    } else if ((0 == strncmp(token, "-mm", 3)) && IS_SPACE((token[3]))) {
-      token += 4;
-      parseReal2(&(texopt->brightness), &(texopt->contrast), &token, 0.0, 1.0);
-    } else if ((0 == strncmp(token, "-colorspace", 11)) &&
-               IS_SPACE((token[11]))) {
-      token += 12;
-      texopt->colorspace = parseString(&token);
-    } else {
-// Assume texture filename
-#if 0
-      size_t len = strcspn(token, " \t\r");  // untile next space
-      texture_name = std::string(token, token + len);
-      token += len;
-
-      token += strspn(token, " \t");  // skip space
-#else
-      // Read filename until line end to parse filename containing whitespace
-      // TODO(syoyo): Support parsing texture option flag after the filename.
-      texture_name = std::string(token);
-      token += texture_name.length();
-#endif
-
-      found_texname = true;
-    }
-  }
-
-  if (found_texname) {
-    (*texname) = texture_name;
-    return true;
-  } else {
-    return false;
-  }
-}
-
-static void InitTexOpt(texture_option_t *texopt, const bool is_bump) {
-  if (is_bump) {
-    texopt->imfchan = 'l';
-  } else {
-    texopt->imfchan = 'm';
-  }
-  texopt->bump_multiplier = static_cast<real_t>(1.0);
-  texopt->clamp = false;
-  texopt->blendu = true;
-  texopt->blendv = true;
-  texopt->sharpness = static_cast<real_t>(1.0);
-  texopt->brightness = static_cast<real_t>(0.0);
-  texopt->contrast = static_cast<real_t>(1.0);
-  texopt->origin_offset[0] = static_cast<real_t>(0.0);
-  texopt->origin_offset[1] = static_cast<real_t>(0.0);
-  texopt->origin_offset[2] = static_cast<real_t>(0.0);
-  texopt->scale[0] = static_cast<real_t>(1.0);
-  texopt->scale[1] = static_cast<real_t>(1.0);
-  texopt->scale[2] = static_cast<real_t>(1.0);
-  texopt->turbulence[0] = static_cast<real_t>(0.0);
-  texopt->turbulence[1] = static_cast<real_t>(0.0);
-  texopt->turbulence[2] = static_cast<real_t>(0.0);
-  texopt->texture_resolution = -1;
-  texopt->type = TEXTURE_TYPE_NONE;
-}
-
-static void InitMaterial(material_t *material) {
-  InitTexOpt(&material->ambient_texopt, /* is_bump */ false);
-  InitTexOpt(&material->diffuse_texopt, /* is_bump */ false);
-  InitTexOpt(&material->specular_texopt, /* is_bump */ false);
-  InitTexOpt(&material->specular_highlight_texopt, /* is_bump */ false);
-  InitTexOpt(&material->bump_texopt, /* is_bump */ true);
-  InitTexOpt(&material->displacement_texopt, /* is_bump */ false);
-  InitTexOpt(&material->alpha_texopt, /* is_bump */ false);
-  InitTexOpt(&material->reflection_texopt, /* is_bump */ false);
-  InitTexOpt(&material->roughness_texopt, /* is_bump */ false);
-  InitTexOpt(&material->metallic_texopt, /* is_bump */ false);
-  InitTexOpt(&material->sheen_texopt, /* is_bump */ false);
-  InitTexOpt(&material->emissive_texopt, /* is_bump */ false);
-  InitTexOpt(&material->normal_texopt,
-             /* is_bump */ false);  // @fixme { is_bump will be true? }
-  material->name = "";
-  material->ambient_texname = "";
-  material->diffuse_texname = "";
-  material->specular_texname = "";
-  material->specular_highlight_texname = "";
-  material->bump_texname = "";
-  material->displacement_texname = "";
-  material->reflection_texname = "";
-  material->alpha_texname = "";
-  for (int i = 0; i < 3; i++) {
-    material->ambient[i] = static_cast<real_t>(0.0);
-    material->diffuse[i] = static_cast<real_t>(0.0);
-    material->specular[i] = static_cast<real_t>(0.0);
-    material->transmittance[i] = static_cast<real_t>(0.0);
-    material->emission[i] = static_cast<real_t>(0.0);
-  }
-  material->illum = 0;
-  material->dissolve = static_cast<real_t>(1.0);
-  material->shininess = static_cast<real_t>(1.0);
-  material->ior = static_cast<real_t>(1.0);
-
-  material->roughness = static_cast<real_t>(0.0);
-  material->metallic = static_cast<real_t>(0.0);
-  material->sheen = static_cast<real_t>(0.0);
-  material->clearcoat_thickness = static_cast<real_t>(0.0);
-  material->clearcoat_roughness = static_cast<real_t>(0.0);
-  material->anisotropy_rotation = static_cast<real_t>(0.0);
-  material->anisotropy = static_cast<real_t>(0.0);
-  material->roughness_texname = "";
-  material->metallic_texname = "";
-  material->sheen_texname = "";
-  material->emissive_texname = "";
-  material->normal_texname = "";
-
-  material->unknown_parameter.clear();
-}
-
-// code from https://wrf.ecse.rpi.edu//Research/Short_Notes/pnpoly.html
-template <typename T>
-static int pnpoly(int nvert, T *vertx, T *verty, T testx, T testy) {
-  int i, j, c = 0;
-  for (i = 0, j = nvert - 1; i < nvert; j = i++) {
-    if (((verty[i] > testy) != (verty[j] > testy)) &&
-        (testx <
-         (vertx[j] - vertx[i]) * (testy - verty[i]) / (verty[j] - verty[i]) +
-             vertx[i]))
-      c = !c;
-  }
-  return c;
-}
-
-struct TinyObjPoint {
-  real_t x, y, z;
-  TinyObjPoint() : x(0), y(0), z(0) {}
-  TinyObjPoint(real_t x_, real_t y_, real_t z_) :
-    x(x_), y(y_), z(z_) {}
-};
-
-inline TinyObjPoint cross(const TinyObjPoint &v1, const TinyObjPoint &v2) {
-  return TinyObjPoint(v1.y * v2.z - v1.z * v2.y,
-                      v1.z * v2.x - v1.x * v2.z,
-                      v1.x * v2.y - v1.y * v2.x);
-}
-
-inline real_t dot(const TinyObjPoint &v1, const TinyObjPoint &v2) {
-  return (v1.x * v2.x + v1.y * v2.y + v1.z * v2.z);
-}
-
-inline real_t GetLength(TinyObjPoint &e) {
-	return std::sqrt(e.x*e.x + e.y*e.y + e.z*e.z);
-}
-
-inline TinyObjPoint Normalize(TinyObjPoint e) {
-	real_t inv_length = real_t(1) / GetLength(e);
-	return TinyObjPoint(e.x * inv_length, e.y * inv_length, e.z * inv_length );
-}
-
-
-inline TinyObjPoint WorldToLocal(const TinyObjPoint& a,
-										  const TinyObjPoint& u,
-										  const TinyObjPoint& v,
-										  const TinyObjPoint& w) {
-  return TinyObjPoint(dot(a,u),dot(a,v),dot(a,w));
-}
-
-
-// TODO(syoyo): refactor function.
-static bool exportGroupsToShape(shape_t *shape, const PrimGroup &prim_group,
-                                const std::vector<tag_t> &tags,
-                                const int material_id, const std::string &name,
-                                bool triangulate, const std::vector<real_t> &v,
-                                std::string *warn) {
-  if (prim_group.IsEmpty()) {
-    return false;
-  }
-
-  shape->name = name;
-
-  // polygon
-  if (!prim_group.faceGroup.empty()) {
-    // Flatten vertices and indices
-    for (size_t i = 0; i < prim_group.faceGroup.size(); i++) {
-      const face_t &face = prim_group.faceGroup[i];
-
-      size_t npolys = face.vertex_indices.size();
-
-      if (npolys < 3) {
-        // Face must have 3+ vertices.
-        if (warn) {
-          (*warn) += "Degenerated face found\n.";
-        }
-        continue;
-      }
-
-      if (triangulate && npolys != 3) {
-        if (npolys == 4) {
-          vertex_index_t i0 = face.vertex_indices[0];
-          vertex_index_t i1 = face.vertex_indices[1];
-          vertex_index_t i2 = face.vertex_indices[2];
-          vertex_index_t i3 = face.vertex_indices[3];
-
-          size_t vi0 = size_t(i0.v_idx);
-          size_t vi1 = size_t(i1.v_idx);
-          size_t vi2 = size_t(i2.v_idx);
-          size_t vi3 = size_t(i3.v_idx);
-
-          if (((3 * vi0 + 2) >= v.size()) || ((3 * vi1 + 2) >= v.size()) ||
-              ((3 * vi2 + 2) >= v.size()) || ((3 * vi3 + 2) >= v.size())) {
-            // Invalid triangle.
-            // FIXME(syoyo): Is it ok to simply skip this invalid triangle?
-            if (warn) {
-              (*warn) += "Face with invalid vertex index found.\n";
-            }
-            continue;
-          }
-
-          real_t v0x = v[vi0 * 3 + 0];
-          real_t v0y = v[vi0 * 3 + 1];
-          real_t v0z = v[vi0 * 3 + 2];
-          real_t v1x = v[vi1 * 3 + 0];
-          real_t v1y = v[vi1 * 3 + 1];
-          real_t v1z = v[vi1 * 3 + 2];
-          real_t v2x = v[vi2 * 3 + 0];
-          real_t v2y = v[vi2 * 3 + 1];
-          real_t v2z = v[vi2 * 3 + 2];
-          real_t v3x = v[vi3 * 3 + 0];
-          real_t v3y = v[vi3 * 3 + 1];
-          real_t v3z = v[vi3 * 3 + 2];
-
-          // There are two candidates to split the quad into two triangles.
-          //
-          // Choose the shortest edge.
-          // TODO: Is it better to determine the edge to split by calculating
-          // the area of each triangle?
-          //
-          // +---+
-          // |\  |
-          // | \ |
-          // |  \|
-          // +---+
-          //
-          // +---+
-          // |  /|
-          // | / |
-          // |/  |
-          // +---+
-
-          real_t e02x = v2x - v0x;
-          real_t e02y = v2y - v0y;
-          real_t e02z = v2z - v0z;
-          real_t e13x = v3x - v1x;
-          real_t e13y = v3y - v1y;
-          real_t e13z = v3z - v1z;
-
-          real_t sqr02 = e02x * e02x + e02y * e02y + e02z * e02z;
-          real_t sqr13 = e13x * e13x + e13y * e13y + e13z * e13z;
-
-          index_t idx0, idx1, idx2, idx3;
-
-          idx0.vertex_index = i0.v_idx;
-          idx0.normal_index = i0.vn_idx;
-          idx0.texcoord_index = i0.vt_idx;
-          idx1.vertex_index = i1.v_idx;
-          idx1.normal_index = i1.vn_idx;
-          idx1.texcoord_index = i1.vt_idx;
-          idx2.vertex_index = i2.v_idx;
-          idx2.normal_index = i2.vn_idx;
-          idx2.texcoord_index = i2.vt_idx;
-          idx3.vertex_index = i3.v_idx;
-          idx3.normal_index = i3.vn_idx;
-          idx3.texcoord_index = i3.vt_idx;
-
-          if (sqr02 < sqr13) {
-            // [0, 1, 2], [0, 2, 3]
-            shape->mesh.indices.push_back(idx0);
-            shape->mesh.indices.push_back(idx1);
-            shape->mesh.indices.push_back(idx2);
-
-            shape->mesh.indices.push_back(idx0);
-            shape->mesh.indices.push_back(idx2);
-            shape->mesh.indices.push_back(idx3);
-          } else {
-            // [0, 1, 3], [1, 2, 3]
-            shape->mesh.indices.push_back(idx0);
-            shape->mesh.indices.push_back(idx1);
-            shape->mesh.indices.push_back(idx3);
-
-            shape->mesh.indices.push_back(idx1);
-            shape->mesh.indices.push_back(idx2);
-            shape->mesh.indices.push_back(idx3);
-          }
-
-          // Two triangle faces
-          shape->mesh.num_face_vertices.push_back(3);
-          shape->mesh.num_face_vertices.push_back(3);
-
-          shape->mesh.material_ids.push_back(material_id);
-          shape->mesh.material_ids.push_back(material_id);
-
-          shape->mesh.smoothing_group_ids.push_back(face.smoothing_group_id);
-          shape->mesh.smoothing_group_ids.push_back(face.smoothing_group_id);
-
-        } else {
-#ifdef TINYOBJLOADER_USE_MAPBOX_EARCUT
-          vertex_index_t i0 = face.vertex_indices[0];
-          vertex_index_t i0_2 = i0;
-
-          // TMW change: Find the normal axis of the polygon using Newell's method
-          TinyObjPoint n;
-          for (size_t k = 0; k < npolys; ++k) {
-            i0 = face.vertex_indices[k % npolys];
-            size_t vi0 = size_t(i0.v_idx);
-
-            size_t j = (k + 1) % npolys;
-            i0_2 = face.vertex_indices[j];
-            size_t vi0_2 = size_t(i0_2.v_idx);
-
-            real_t v0x = v[vi0 * 3 + 0];
-            real_t v0y = v[vi0 * 3 + 1];
-            real_t v0z = v[vi0 * 3 + 2];
-
-            real_t v0x_2 = v[vi0_2 * 3 + 0];
-            real_t v0y_2 = v[vi0_2 * 3 + 1];
-            real_t v0z_2 = v[vi0_2 * 3 + 2];
-
-            const TinyObjPoint point1(v0x,v0y,v0z);
-            const TinyObjPoint point2(v0x_2,v0y_2,v0z_2);
-
-            TinyObjPoint a(point1.x - point2.x, point1.y - point2.y, point1.z - point2.z);
-            TinyObjPoint b(point1.x + point2.x, point1.y + point2.y, point1.z + point2.z);
-
-            n.x += (a.y * b.z);
-            n.y += (a.z * b.x);
-            n.z += (a.x * b.y);
-          }
-          real_t length_n = GetLength(n);
-          //Check if zero length normal
-          if(length_n <= 0) {
-            continue;
-          }
-          //Negative is to flip the normal to the correct direction
-          real_t inv_length = -real_t(1.0) / length_n;
-          n.x *= inv_length;
-          n.y *= inv_length;
-          n.z *= inv_length;
-
-          TinyObjPoint axis_w, axis_v, axis_u;
-          axis_w = n;
-          TinyObjPoint a;
-          if(std::abs(axis_w.x) > real_t(0.9999999)) {
-            a = TinyObjPoint(0,1,0);
-          } else {
-            a = TinyObjPoint(1,0,0);
-          }
-          axis_v = Normalize(cross(axis_w, a));
-          axis_u = cross(axis_w, axis_v);
-          using Point = std::array<real_t, 2>;
-
-          // first polyline define the main polygon.
-          // following polylines define holes(not used in tinyobj).
-          std::vector<std::vector<Point> > polygon;
-
-          std::vector<Point> polyline;
-
-          //TMW change: Find best normal and project v0x and v0y to those coordinates, instead of
-          //picking a plane aligned with an axis (which can flip polygons).
-
-          // Fill polygon data(facevarying vertices).
-          for (size_t k = 0; k < npolys; k++) {
-            i0 = face.vertex_indices[k];
-            size_t vi0 = size_t(i0.v_idx);
-
-            assert(((3 * vi0 + 2) < v.size()));
-
-            real_t v0x = v[vi0 * 3 + 0];
-            real_t v0y = v[vi0 * 3 + 1];
-            real_t v0z = v[vi0 * 3 + 2];
-
-            TinyObjPoint polypoint(v0x,v0y,v0z);
-            TinyObjPoint loc = WorldToLocal(polypoint, axis_u, axis_v, axis_w);
-
-            polyline.push_back({loc.x, loc.y});
-          }
-
-          polygon.push_back(polyline);
-          std::vector<uint32_t> indices = mapbox::earcut<uint32_t>(polygon);
-          // => result = 3 * faces, clockwise
-
-          assert(indices.size() % 3 == 0);
-
-          // Reconstruct vertex_index_t
-          for (size_t k = 0; k < indices.size() / 3; k++) {
-            {
-              index_t idx0, idx1, idx2;
-              idx0.vertex_index = face.vertex_indices[indices[3 * k + 0]].v_idx;
-              idx0.normal_index =
-                face.vertex_indices[indices[3 * k + 0]].vn_idx;
-              idx0.texcoord_index =
-                face.vertex_indices[indices[3 * k + 0]].vt_idx;
-              idx1.vertex_index = face.vertex_indices[indices[3 * k + 1]].v_idx;
-              idx1.normal_index =
-                face.vertex_indices[indices[3 * k + 1]].vn_idx;
-              idx1.texcoord_index =
-                face.vertex_indices[indices[3 * k + 1]].vt_idx;
-              idx2.vertex_index = face.vertex_indices[indices[3 * k + 2]].v_idx;
-              idx2.normal_index =
-                face.vertex_indices[indices[3 * k + 2]].vn_idx;
-              idx2.texcoord_index =
-                face.vertex_indices[indices[3 * k + 2]].vt_idx;
-
-              shape->mesh.indices.push_back(idx0);
-              shape->mesh.indices.push_back(idx1);
-              shape->mesh.indices.push_back(idx2);
-
-              shape->mesh.num_face_vertices.push_back(3);
-              shape->mesh.material_ids.push_back(material_id);
-              shape->mesh.smoothing_group_ids.push_back(
-                  face.smoothing_group_id);
-            }
-          }
-
-#else  // Built-in ear clipping triangulation
-          vertex_index_t i0 = face.vertex_indices[0];
-          vertex_index_t i1(-1);
-          vertex_index_t i2 = face.vertex_indices[1];
-
-          // find the two axes to work in
-          size_t axes[2] = {1, 2};
-          for (size_t k = 0; k < npolys; ++k) {
-            i0 = face.vertex_indices[(k + 0) % npolys];
-            i1 = face.vertex_indices[(k + 1) % npolys];
-            i2 = face.vertex_indices[(k + 2) % npolys];
-            size_t vi0 = size_t(i0.v_idx);
-            size_t vi1 = size_t(i1.v_idx);
-            size_t vi2 = size_t(i2.v_idx);
-
-            if (((3 * vi0 + 2) >= v.size()) || ((3 * vi1 + 2) >= v.size()) ||
-                ((3 * vi2 + 2) >= v.size())) {
-              // Invalid triangle.
-              // FIXME(syoyo): Is it ok to simply skip this invalid triangle?
-              continue;
-            }
-            real_t v0x = v[vi0 * 3 + 0];
-            real_t v0y = v[vi0 * 3 + 1];
-            real_t v0z = v[vi0 * 3 + 2];
-            real_t v1x = v[vi1 * 3 + 0];
-            real_t v1y = v[vi1 * 3 + 1];
-            real_t v1z = v[vi1 * 3 + 2];
-            real_t v2x = v[vi2 * 3 + 0];
-            real_t v2y = v[vi2 * 3 + 1];
-            real_t v2z = v[vi2 * 3 + 2];
-            real_t e0x = v1x - v0x;
-            real_t e0y = v1y - v0y;
-            real_t e0z = v1z - v0z;
-            real_t e1x = v2x - v1x;
-            real_t e1y = v2y - v1y;
-            real_t e1z = v2z - v1z;
-            real_t cx = std::fabs(e0y * e1z - e0z * e1y);
-            real_t cy = std::fabs(e0z * e1x - e0x * e1z);
-            real_t cz = std::fabs(e0x * e1y - e0y * e1x);
-            const real_t epsilon = std::numeric_limits<real_t>::epsilon();
-            // std::cout << "cx " << cx << ", cy " << cy << ", cz " << cz <<
-            // "\n";
-            if (cx > epsilon || cy > epsilon || cz > epsilon) {
-              // std::cout << "corner\n";
-              // found a corner
-              if (cx > cy && cx > cz) {
-                // std::cout << "pattern0\n";
-              } else {
-                // std::cout << "axes[0] = 0\n";
-                axes[0] = 0;
-                if (cz > cx && cz > cy) {
-                  // std::cout << "axes[1] = 1\n";
-                  axes[1] = 1;
-                }
-              }
-              break;
-            }
-          }
-
-          face_t remainingFace = face;  // copy
-          size_t guess_vert = 0;
-          vertex_index_t ind[3];
-          real_t vx[3];
-          real_t vy[3];
-
-          // How many iterations can we do without decreasing the remaining
-          // vertices.
-          size_t remainingIterations = face.vertex_indices.size();
-          size_t previousRemainingVertices =
-              remainingFace.vertex_indices.size();
-
-          while (remainingFace.vertex_indices.size() > 3 &&
-                 remainingIterations > 0) {
-            // std::cout << "remainingIterations " << remainingIterations <<
-            // "\n";
-
-            npolys = remainingFace.vertex_indices.size();
-            if (guess_vert >= npolys) {
-              guess_vert -= npolys;
-            }
-
-            if (previousRemainingVertices != npolys) {
-              // The number of remaining vertices decreased. Reset counters.
-              previousRemainingVertices = npolys;
-              remainingIterations = npolys;
-            } else {
-              // We didn't consume a vertex on previous iteration, reduce the
-              // available iterations.
-              remainingIterations--;
-            }
-
-            for (size_t k = 0; k < 3; k++) {
-              ind[k] = remainingFace.vertex_indices[(guess_vert + k) % npolys];
-              size_t vi = size_t(ind[k].v_idx);
-              if (((vi * 3 + axes[0]) >= v.size()) ||
-                  ((vi * 3 + axes[1]) >= v.size())) {
-                // ???
-                vx[k] = static_cast<real_t>(0.0);
-                vy[k] = static_cast<real_t>(0.0);
-              } else {
-                vx[k] = v[vi * 3 + axes[0]];
-                vy[k] = v[vi * 3 + axes[1]];
-              }
-            }
-
-            //
-            // area is calculated per face
-            //
-            real_t e0x = vx[1] - vx[0];
-            real_t e0y = vy[1] - vy[0];
-            real_t e1x = vx[2] - vx[1];
-            real_t e1y = vy[2] - vy[1];
-            real_t cross = e0x * e1y - e0y * e1x;
-            // std::cout << "axes = " << axes[0] << ", " << axes[1] << "\n";
-            // std::cout << "e0x, e0y, e1x, e1y " << e0x << ", " << e0y << ", "
-            // << e1x << ", " << e1y << "\n";
-
-            real_t area = (vx[0] * vy[1] - vy[0] * vx[1]) * static_cast<real_t>(0.5);
-            // std::cout << "cross " << cross << ", area " << area << "\n";
-            // if an internal angle
-            if (cross * area < static_cast<real_t>(0.0)) {
-              // std::cout << "internal \n";
-              guess_vert += 1;
-              // std::cout << "guess vert : " << guess_vert << "\n";
-              continue;
-            }
-
-            // check all other verts in case they are inside this triangle
-            bool overlap = false;
-            for (size_t otherVert = 3; otherVert < npolys; ++otherVert) {
-              size_t idx = (guess_vert + otherVert) % npolys;
-
-              if (idx >= remainingFace.vertex_indices.size()) {
-                // std::cout << "???0\n";
-                // ???
-                continue;
-              }
-
-              size_t ovi = size_t(remainingFace.vertex_indices[idx].v_idx);
-
-              if (((ovi * 3 + axes[0]) >= v.size()) ||
-                  ((ovi * 3 + axes[1]) >= v.size())) {
-                // std::cout << "???1\n";
-                // ???
-                continue;
-              }
-              real_t tx = v[ovi * 3 + axes[0]];
-              real_t ty = v[ovi * 3 + axes[1]];
-              if (pnpoly(3, vx, vy, tx, ty)) {
-                // std::cout << "overlap\n";
-                overlap = true;
-                break;
-              }
-            }
-
-            if (overlap) {
-              // std::cout << "overlap2\n";
-              guess_vert += 1;
-              continue;
-            }
-
-            // this triangle is an ear
-            {
-              index_t idx0, idx1, idx2;
-              idx0.vertex_index = ind[0].v_idx;
-              idx0.normal_index = ind[0].vn_idx;
-              idx0.texcoord_index = ind[0].vt_idx;
-              idx1.vertex_index = ind[1].v_idx;
-              idx1.normal_index = ind[1].vn_idx;
-              idx1.texcoord_index = ind[1].vt_idx;
-              idx2.vertex_index = ind[2].v_idx;
-              idx2.normal_index = ind[2].vn_idx;
-              idx2.texcoord_index = ind[2].vt_idx;
-
-              shape->mesh.indices.push_back(idx0);
-              shape->mesh.indices.push_back(idx1);
-              shape->mesh.indices.push_back(idx2);
-
-              shape->mesh.num_face_vertices.push_back(3);
-              shape->mesh.material_ids.push_back(material_id);
-              shape->mesh.smoothing_group_ids.push_back(
-                  face.smoothing_group_id);
-            }
-
-            // remove v1 from the list
-            size_t removed_vert_index = (guess_vert + 1) % npolys;
-            while (removed_vert_index + 1 < npolys) {
-              remainingFace.vertex_indices[removed_vert_index] =
-                  remainingFace.vertex_indices[removed_vert_index + 1];
-              removed_vert_index += 1;
-            }
-            remainingFace.vertex_indices.pop_back();
-          }
-
-          // std::cout << "remainingFace.vi.size = " <<
-          // remainingFace.vertex_indices.size() << "\n";
-          if (remainingFace.vertex_indices.size() == 3) {
-            i0 = remainingFace.vertex_indices[0];
-            i1 = remainingFace.vertex_indices[1];
-            i2 = remainingFace.vertex_indices[2];
-            {
-              index_t idx0, idx1, idx2;
-              idx0.vertex_index = i0.v_idx;
-              idx0.normal_index = i0.vn_idx;
-              idx0.texcoord_index = i0.vt_idx;
-              idx1.vertex_index = i1.v_idx;
-              idx1.normal_index = i1.vn_idx;
-              idx1.texcoord_index = i1.vt_idx;
-              idx2.vertex_index = i2.v_idx;
-              idx2.normal_index = i2.vn_idx;
-              idx2.texcoord_index = i2.vt_idx;
-
-              shape->mesh.indices.push_back(idx0);
-              shape->mesh.indices.push_back(idx1);
-              shape->mesh.indices.push_back(idx2);
-
-              shape->mesh.num_face_vertices.push_back(3);
-              shape->mesh.material_ids.push_back(material_id);
-              shape->mesh.smoothing_group_ids.push_back(
-                  face.smoothing_group_id);
-            }
-          }
-#endif
-        }  // npolys
-      } else {
-        for (size_t k = 0; k < npolys; k++) {
-          index_t idx;
-          idx.vertex_index = face.vertex_indices[k].v_idx;
-          idx.normal_index = face.vertex_indices[k].vn_idx;
-          idx.texcoord_index = face.vertex_indices[k].vt_idx;
-          shape->mesh.indices.push_back(idx);
-        }
-
-        shape->mesh.num_face_vertices.push_back(
-            static_cast<unsigned char>(npolys));
-        shape->mesh.material_ids.push_back(material_id);  // per face
-        shape->mesh.smoothing_group_ids.push_back(
-            face.smoothing_group_id);  // per face
-      }
-    }
-
-    shape->mesh.tags = tags;
-  }
-
-  // line
-  if (!prim_group.lineGroup.empty()) {
-    // Flatten indices
-    for (size_t i = 0; i < prim_group.lineGroup.size(); i++) {
-      for (size_t j = 0; j < prim_group.lineGroup[i].vertex_indices.size();
-           j++) {
-        const vertex_index_t &vi = prim_group.lineGroup[i].vertex_indices[j];
-
-        index_t idx;
-        idx.vertex_index = vi.v_idx;
-        idx.normal_index = vi.vn_idx;
-        idx.texcoord_index = vi.vt_idx;
-
-        shape->lines.indices.push_back(idx);
-      }
-
-      shape->lines.num_line_vertices.push_back(
-          int(prim_group.lineGroup[i].vertex_indices.size()));
-    }
-  }
-
-  // points
-  if (!prim_group.pointsGroup.empty()) {
-    // Flatten & convert indices
-    for (size_t i = 0; i < prim_group.pointsGroup.size(); i++) {
-      for (size_t j = 0; j < prim_group.pointsGroup[i].vertex_indices.size();
-           j++) {
-        const vertex_index_t &vi = prim_group.pointsGroup[i].vertex_indices[j];
-
-        index_t idx;
-        idx.vertex_index = vi.v_idx;
-        idx.normal_index = vi.vn_idx;
-        idx.texcoord_index = vi.vt_idx;
-
-        shape->points.indices.push_back(idx);
-      }
-    }
-  }
-
-  return true;
-}
-
-// Split a string with specified delimiter character and escape character.
-// https://rosettacode.org/wiki/Tokenize_a_string_with_escaping#C.2B.2B
-static void SplitString(const std::string &s, char delim, char escape,
-                        std::vector<std::string> &elems) {
-  std::string token;
-
-  bool escaping = false;
-  for (size_t i = 0; i < s.size(); ++i) {
-    char ch = s[i];
-    if (escaping) {
-      escaping = false;
-    } else if (ch == escape) {
-      escaping = true;
-      continue;
-    } else if (ch == delim) {
-      if (!token.empty()) {
-        elems.push_back(token);
-      }
-      token.clear();
-      continue;
-    }
-    token += ch;
-  }
-
-  elems.push_back(token);
-}
-
-static std::string JoinPath(const std::string &dir,
-                            const std::string &filename) {
-  if (dir.empty()) {
-    return filename;
-  } else {
-    // check '/'
-    char lastChar = *dir.rbegin();
-    if (lastChar != '/') {
-      return dir + std::string("/") + filename;
-    } else {
-      return dir + filename;
-    }
-  }
-}
-
-void LoadMtl(std::map<std::string, int> *material_map,
-             std::vector<material_t> *materials, std::istream *inStream,
-             std::string *warning, std::string *err) {
-  (void)err;
-
-  // Create a default material anyway.
-  material_t material;
-  InitMaterial(&material);
-
-  // Issue 43. `d` wins against `Tr` since `Tr` is not in the MTL specification.
-  bool has_d = false;
-  bool has_tr = false;
-
-  // has_kd is used to set a default diffuse value when map_Kd is present
-  // and Kd is not.
-  bool has_kd = false;
-
-  std::stringstream warn_ss;
-
-  size_t line_no = 0;
-  std::string linebuf;
-  while (inStream->peek() != -1) {
-    safeGetline(*inStream, linebuf);
-    line_no++;
-
-    // Trim trailing whitespace.
-    if (linebuf.size() > 0) {
-      linebuf = linebuf.substr(0, linebuf.find_last_not_of(" \t") + 1);
-    }
-
-    // Trim newline '\r\n' or '\n'
-    if (linebuf.size() > 0) {
-      if (linebuf[linebuf.size() - 1] == '\n')
-        linebuf.erase(linebuf.size() - 1);
-    }
-    if (linebuf.size() > 0) {
-      if (linebuf[linebuf.size() - 1] == '\r')
-        linebuf.erase(linebuf.size() - 1);
-    }
-
-    // Skip if empty line.
-    if (linebuf.empty()) {
-      continue;
-    }
-
-    // Skip leading space.
-    const char *token = linebuf.c_str();
-    token += strspn(token, " \t");
-
-    assert(token);
-    if (token[0] == '\0') continue;  // empty line
-
-    if (token[0] == '#') continue;  // comment line
-
-    // new mtl
-    if ((0 == strncmp(token, "newmtl", 6)) && IS_SPACE((token[6]))) {
-      // flush previous material.
-      if (!material.name.empty()) {
-        material_map->insert(std::pair<std::string, int>(
-            material.name, static_cast<int>(materials->size())));
-        materials->push_back(material);
-      }
-
-      // initial temporary material
-      InitMaterial(&material);
-
-      has_d = false;
-      has_tr = false;
-
-      // set new mtl name
-      token += 7;
-      {
-        std::string namebuf = parseString(&token);
-        // TODO: empty name check?
-        if (namebuf.empty()) {
-          if (warning) {
-            (*warning) += "empty material name in `newmtl`\n";
-          }
-        }
-        material.name = namebuf;
-      }
-      continue;
-    }
-
-    // ambient
-    if (token[0] == 'K' && token[1] == 'a' && IS_SPACE((token[2]))) {
-      token += 2;
-      real_t r, g, b;
-      parseReal3(&r, &g, &b, &token);
-      material.ambient[0] = r;
-      material.ambient[1] = g;
-      material.ambient[2] = b;
-      continue;
-    }
-
-    // diffuse
-    if (token[0] == 'K' && token[1] == 'd' && IS_SPACE((token[2]))) {
-      token += 2;
-      real_t r, g, b;
-      parseReal3(&r, &g, &b, &token);
-      material.diffuse[0] = r;
-      material.diffuse[1] = g;
-      material.diffuse[2] = b;
-      has_kd = true;
-      continue;
-    }
-
-    // specular
-    if (token[0] == 'K' && token[1] == 's' && IS_SPACE((token[2]))) {
-      token += 2;
-      real_t r, g, b;
-      parseReal3(&r, &g, &b, &token);
-      material.specular[0] = r;
-      material.specular[1] = g;
-      material.specular[2] = b;
-      continue;
-    }
-
-    // transmittance
-    if ((token[0] == 'K' && token[1] == 't' && IS_SPACE((token[2]))) ||
-        (token[0] == 'T' && token[1] == 'f' && IS_SPACE((token[2])))) {
-      token += 2;
-      real_t r, g, b;
-      parseReal3(&r, &g, &b, &token);
-      material.transmittance[0] = r;
-      material.transmittance[1] = g;
-      material.transmittance[2] = b;
-      continue;
-    }
-
-    // ior(index of refraction)
-    if (token[0] == 'N' && token[1] == 'i' && IS_SPACE((token[2]))) {
-      token += 2;
-      material.ior = parseReal(&token);
-      continue;
-    }
-
-    // emission
-    if (token[0] == 'K' && token[1] == 'e' && IS_SPACE(token[2])) {
-      token += 2;
-      real_t r, g, b;
-      parseReal3(&r, &g, &b, &token);
-      material.emission[0] = r;
-      material.emission[1] = g;
-      material.emission[2] = b;
-      continue;
-    }
-
-    // shininess
-    if (token[0] == 'N' && token[1] == 's' && IS_SPACE(token[2])) {
-      token += 2;
-      material.shininess = parseReal(&token);
-      continue;
-    }
-
-    // illum model
-    if (0 == strncmp(token, "illum", 5) && IS_SPACE(token[5])) {
-      token += 6;
-      material.illum = parseInt(&token);
-      continue;
-    }
-
-    // dissolve
-    if ((token[0] == 'd' && IS_SPACE(token[1]))) {
-      token += 1;
-      material.dissolve = parseReal(&token);
-
-      if (has_tr) {
-        warn_ss << "Both `d` and `Tr` parameters defined for \""
-                << material.name
-                << "\". Use the value of `d` for dissolve (line " << line_no
-                << " in .mtl.)\n";
-      }
-      has_d = true;
-      continue;
-    }
-    if (token[0] == 'T' && token[1] == 'r' && IS_SPACE(token[2])) {
-      token += 2;
-      if (has_d) {
-        // `d` wins. Ignore `Tr` value.
-        warn_ss << "Both `d` and `Tr` parameters defined for \""
-                << material.name
-                << "\". Use the value of `d` for dissolve (line " << line_no
-                << " in .mtl.)\n";
-      } else {
-        // We invert value of Tr(assume Tr is in range [0, 1])
-        // NOTE: Interpretation of Tr is application(exporter) dependent. For
-        // some application(e.g. 3ds max obj exporter), Tr = d(Issue 43)
-        material.dissolve = static_cast<real_t>(1.0) - parseReal(&token);
-      }
-      has_tr = true;
-      continue;
-    }
-
-    // PBR: roughness
-    if (token[0] == 'P' && token[1] == 'r' && IS_SPACE(token[2])) {
-      token += 2;
-      material.roughness = parseReal(&token);
-      continue;
-    }
-
-    // PBR: metallic
-    if (token[0] == 'P' && token[1] == 'm' && IS_SPACE(token[2])) {
-      token += 2;
-      material.metallic = parseReal(&token);
-      continue;
-    }
-
-    // PBR: sheen
-    if (token[0] == 'P' && token[1] == 's' && IS_SPACE(token[2])) {
-      token += 2;
-      material.sheen = parseReal(&token);
-      continue;
-    }
-
-    // PBR: clearcoat thickness
-    if (token[0] == 'P' && token[1] == 'c' && IS_SPACE(token[2])) {
-      token += 2;
-      material.clearcoat_thickness = parseReal(&token);
-      continue;
-    }
-
-    // PBR: clearcoat roughness
-    if ((0 == strncmp(token, "Pcr", 3)) && IS_SPACE(token[3])) {
-      token += 4;
-      material.clearcoat_roughness = parseReal(&token);
-      continue;
-    }
-
-    // PBR: anisotropy
-    if ((0 == strncmp(token, "aniso", 5)) && IS_SPACE(token[5])) {
-      token += 6;
-      material.anisotropy = parseReal(&token);
-      continue;
-    }
-
-    // PBR: anisotropy rotation
-    if ((0 == strncmp(token, "anisor", 6)) && IS_SPACE(token[6])) {
-      token += 7;
-      material.anisotropy_rotation = parseReal(&token);
-      continue;
-    }
-
-    // ambient texture
-    if ((0 == strncmp(token, "map_Ka", 6)) && IS_SPACE(token[6])) {
-      token += 7;
-      ParseTextureNameAndOption(&(material.ambient_texname),
-                                &(material.ambient_texopt), token);
-      continue;
-    }
-
-    // diffuse texture
-    if ((0 == strncmp(token, "map_Kd", 6)) && IS_SPACE(token[6])) {
-      token += 7;
-      ParseTextureNameAndOption(&(material.diffuse_texname),
-                                &(material.diffuse_texopt), token);
-
-      // Set a decent diffuse default value if a diffuse texture is specified
-      // without a matching Kd value.
-      if (!has_kd) {
-        material.diffuse[0] = static_cast<real_t>(0.6);
-        material.diffuse[1] = static_cast<real_t>(0.6);
-        material.diffuse[2] = static_cast<real_t>(0.6);
-      }
-
-      continue;
-    }
-
-    // specular texture
-    if ((0 == strncmp(token, "map_Ks", 6)) && IS_SPACE(token[6])) {
-      token += 7;
-      ParseTextureNameAndOption(&(material.specular_texname),
-                                &(material.specular_texopt), token);
-      continue;
-    }
-
-    // specular highlight texture
-    if ((0 == strncmp(token, "map_Ns", 6)) && IS_SPACE(token[6])) {
-      token += 7;
-      ParseTextureNameAndOption(&(material.specular_highlight_texname),
-                                &(material.specular_highlight_texopt), token);
-      continue;
-    }
-
-    // bump texture
-    if (((0 == strncmp(token, "map_bump", 8)) ||
-         (0 == strncmp(token, "map_Bump", 8))) &&
-        IS_SPACE(token[8])) {
-      token += 9;
-      ParseTextureNameAndOption(&(material.bump_texname),
-                                &(material.bump_texopt), token);
-      continue;
-    }
-
-    // bump texture
-    if ((0 == strncmp(token, "bump", 4)) && IS_SPACE(token[4])) {
-      token += 5;
-      ParseTextureNameAndOption(&(material.bump_texname),
-                                &(material.bump_texopt), token);
-      continue;
-    }
-
-    // alpha texture
-    if ((0 == strncmp(token, "map_d", 5)) && IS_SPACE(token[5])) {
-      token += 6;
-      material.alpha_texname = token;
-      ParseTextureNameAndOption(&(material.alpha_texname),
-                                &(material.alpha_texopt), token);
-      continue;
-    }
-
-    // displacement texture
-    if (((0 == strncmp(token, "map_disp", 8)) ||
-         (0 == strncmp(token, "map_Disp", 8))) &&
-        IS_SPACE(token[8])) {
-      token += 9;
-      ParseTextureNameAndOption(&(material.displacement_texname),
-                                &(material.displacement_texopt), token);
-      continue;
-    }
-
-    // displacement texture
-    if ((0 == strncmp(token, "disp", 4)) && IS_SPACE(token[4])) {
-      token += 5;
-      ParseTextureNameAndOption(&(material.displacement_texname),
-                                &(material.displacement_texopt), token);
-      continue;
-    }
-
-    // reflection map
-    if ((0 == strncmp(token, "refl", 4)) && IS_SPACE(token[4])) {
-      token += 5;
-      ParseTextureNameAndOption(&(material.reflection_texname),
-                                &(material.reflection_texopt), token);
-      continue;
-    }
-
-    // PBR: roughness texture
-    if ((0 == strncmp(token, "map_Pr", 6)) && IS_SPACE(token[6])) {
-      token += 7;
-      ParseTextureNameAndOption(&(material.roughness_texname),
-                                &(material.roughness_texopt), token);
-      continue;
-    }
-
-    // PBR: metallic texture
-    if ((0 == strncmp(token, "map_Pm", 6)) && IS_SPACE(token[6])) {
-      token += 7;
-      ParseTextureNameAndOption(&(material.metallic_texname),
-                                &(material.metallic_texopt), token);
-      continue;
-    }
-
-    // PBR: sheen texture
-    if ((0 == strncmp(token, "map_Ps", 6)) && IS_SPACE(token[6])) {
-      token += 7;
-      ParseTextureNameAndOption(&(material.sheen_texname),
-                                &(material.sheen_texopt), token);
-      continue;
-    }
-
-    // PBR: emissive texture
-    if ((0 == strncmp(token, "map_Ke", 6)) && IS_SPACE(token[6])) {
-      token += 7;
-      ParseTextureNameAndOption(&(material.emissive_texname),
-                                &(material.emissive_texopt), token);
-      continue;
-    }
-
-    // PBR: normal map texture
-    if ((0 == strncmp(token, "norm", 4)) && IS_SPACE(token[4])) {
-      token += 5;
-      ParseTextureNameAndOption(&(material.normal_texname),
-                                &(material.normal_texopt), token);
-      continue;
-    }
-
-    // unknown parameter
-    const char *_space = strchr(token, ' ');
-    if (!_space) {
-      _space = strchr(token, '\t');
-    }
-    if (_space) {
-      std::ptrdiff_t len = _space - token;
-      std::string key(token, static_cast<size_t>(len));
-      std::string value = _space + 1;
-      material.unknown_parameter.insert(
-          std::pair<std::string, std::string>(key, value));
-    }
-  }
-  // flush last material.
-  material_map->insert(std::pair<std::string, int>(
-      material.name, static_cast<int>(materials->size())));
-  materials->push_back(material);
-
-  if (warning) {
-    (*warning) = warn_ss.str();
-  }
-}
-
-bool MaterialFileReader::operator()(const std::string &matId,
-                                    std::vector<material_t> *materials,
-                                    std::map<std::string, int> *matMap,
-                                    std::string *warn, std::string *err) {
-  if (!m_mtlBaseDir.empty()) {
-#ifdef _WIN32
-    char sep = ';';
-#else
-    char sep = ':';
-#endif
-
-    // https://stackoverflow.com/questions/5167625/splitting-a-c-stdstring-using-tokens-e-g
-    std::vector<std::string> paths;
-    std::istringstream f(m_mtlBaseDir);
-
-    std::string s;
-    while (getline(f, s, sep)) {
-      paths.push_back(s);
-    }
-
-    for (size_t i = 0; i < paths.size(); i++) {
-      std::string filepath = JoinPath(paths[i], matId);
-
-      std::ifstream matIStream(filepath.c_str());
-      if (matIStream) {
-        LoadMtl(matMap, materials, &matIStream, warn, err);
-
-        return true;
-      }
-    }
-
-    std::stringstream ss;
-    ss << "Material file [ " << matId
-       << " ] not found in a path : " << m_mtlBaseDir << "\n";
-    if (warn) {
-      (*warn) += ss.str();
-    }
-    return false;
-
-  } else {
-    std::string filepath = matId;
-    std::ifstream matIStream(filepath.c_str());
-    if (matIStream) {
-      LoadMtl(matMap, materials, &matIStream, warn, err);
-
-      return true;
-    }
-
-    std::stringstream ss;
-    ss << "Material file [ " << filepath
-       << " ] not found in a path : " << m_mtlBaseDir << "\n";
-    if (warn) {
-      (*warn) += ss.str();
-    }
-
-    return false;
-  }
-}
-
-bool MaterialStreamReader::operator()(const std::string &matId,
-                                      std::vector<material_t> *materials,
-                                      std::map<std::string, int> *matMap,
-                                      std::string *warn, std::string *err) {
-  (void)err;
-  (void)matId;
-  if (!m_inStream) {
-    std::stringstream ss;
-    ss << "Material stream in error state. \n";
-    if (warn) {
-      (*warn) += ss.str();
-    }
-    return false;
-  }
-
-  LoadMtl(matMap, materials, &m_inStream, warn, err);
-
-  return true;
-}
-
-bool LoadObj(attrib_t *attrib, std::vector<shape_t> *shapes,
-             std::vector<material_t> *materials, std::string *warn,
-             std::string *err, const char *filename, const char *mtl_basedir,
-             bool triangulate, bool default_vcols_fallback) {
-  attrib->vertices.clear();
-  attrib->normals.clear();
-  attrib->texcoords.clear();
-  attrib->colors.clear();
-  shapes->clear();
-
-  std::stringstream errss;
-
-  std::ifstream ifs(filename);
-  if (!ifs) {
-    errss << "Cannot open file [" << filename << "]\n";
-    if (err) {
-      (*err) = errss.str();
-    }
-    return false;
-  }
-
-  std::string baseDir = mtl_basedir ? mtl_basedir : "";
-  if (!baseDir.empty()) {
-#ifndef _WIN32
-    const char dirsep = '/';
-#else
-    const char dirsep = '\\';
-#endif
-    if (baseDir[baseDir.length() - 1] != dirsep) baseDir += dirsep;
-  }
-  MaterialFileReader matFileReader(baseDir);
-
-  return LoadObj(attrib, shapes, materials, warn, err, &ifs, &matFileReader,
-                 triangulate, default_vcols_fallback);
-}
-
-bool LoadObj(attrib_t *attrib, std::vector<shape_t> *shapes,
-             std::vector<material_t> *materials, std::string *warn,
-             std::string *err, std::istream *inStream,
-             MaterialReader *readMatFn /*= NULL*/, bool triangulate,
-             bool default_vcols_fallback) {
-  std::stringstream errss;
-
-  std::vector<real_t> v;
-  std::vector<real_t> vn;
-  std::vector<real_t> vt;
-  std::vector<real_t> vc;
-  std::vector<skin_weight_t> vw;
-  std::vector<tag_t> tags;
-  PrimGroup prim_group;
-  std::string name;
-
-  // material
-  std::set<std::string> material_filenames;
-  std::map<std::string, int> material_map;
-  int material = -1;
-
-  // smoothing group id
-  unsigned int current_smoothing_id =
-      0;  // Initial value. 0 means no smoothing.
-
-  int greatest_v_idx = -1;
-  int greatest_vn_idx = -1;
-  int greatest_vt_idx = -1;
-
-  shape_t shape;
-
-  bool found_all_colors = true;
-
-  size_t line_num = 0;
-  std::string linebuf;
-  while (inStream->peek() != -1) {
-    safeGetline(*inStream, linebuf);
-
-    line_num++;
-
-    // Trim newline '\r\n' or '\n'
-    if (linebuf.size() > 0) {
-      if (linebuf[linebuf.size() - 1] == '\n')
-        linebuf.erase(linebuf.size() - 1);
-    }
-    if (linebuf.size() > 0) {
-      if (linebuf[linebuf.size() - 1] == '\r')
-        linebuf.erase(linebuf.size() - 1);
-    }
-
-    // Skip if empty line.
-    if (linebuf.empty()) {
-      continue;
-    }
-
-    // Skip leading space.
-    const char *token = linebuf.c_str();
-    token += strspn(token, " \t");
-
-    assert(token);
-    if (token[0] == '\0') continue;  // empty line
-
-    if (token[0] == '#') continue;  // comment line
-
-    // vertex
-    if (token[0] == 'v' && IS_SPACE((token[1]))) {
-      token += 2;
-      real_t x, y, z;
-      real_t r, g, b;
-
-      found_all_colors &= parseVertexWithColor(&x, &y, &z, &r, &g, &b, &token);
-
-      v.push_back(x);
-      v.push_back(y);
-      v.push_back(z);
-
-      if (found_all_colors || default_vcols_fallback) {
-        vc.push_back(r);
-        vc.push_back(g);
-        vc.push_back(b);
-      }
-
-      continue;
-    }
-
-    // normal
-    if (token[0] == 'v' && token[1] == 'n' && IS_SPACE((token[2]))) {
-      token += 3;
-      real_t x, y, z;
-      parseReal3(&x, &y, &z, &token);
-      vn.push_back(x);
-      vn.push_back(y);
-      vn.push_back(z);
-      continue;
-    }
-
-    // texcoord
-    if (token[0] == 'v' && token[1] == 't' && IS_SPACE((token[2]))) {
-      token += 3;
-      real_t x, y;
-      parseReal2(&x, &y, &token);
-      vt.push_back(x);
-      vt.push_back(y);
-      continue;
-    }
-
-    // skin weight. tinyobj extension
-    if (token[0] == 'v' && token[1] == 'w' && IS_SPACE((token[2]))) {
-      token += 3;
-
-      // vw <vid> <joint_0> <weight_0> <joint_1> <weight_1> ...
-      // example:
-      // vw 0 0 0.25 1 0.25 2 0.5
-
-      // TODO(syoyo): Add syntax check
-      int vid = 0;
-      vid = parseInt(&token);
-
-      skin_weight_t sw;
-
-      sw.vertex_id = vid;
-
-      while (!IS_NEW_LINE(token[0])) {
-        real_t j, w;
-        // joint_id should not be negative, weight may be negative
-        // TODO(syoyo): # of elements check
-        parseReal2(&j, &w, &token, -1.0);
-
-        if (j < static_cast<real_t>(0)) {
-          if (err) {
-            std::stringstream ss;
-            ss << "Failed parse `vw' line. joint_id is negative. "
-                  "line "
-               << line_num << ".)\n";
-            (*err) += ss.str();
-          }
-          return false;
-        }
-
-        joint_and_weight_t jw;
-
-        jw.joint_id = int(j);
-        jw.weight = w;
-
-        sw.weightValues.push_back(jw);
-
-        size_t n = strspn(token, " \t\r");
-        token += n;
-      }
-
-      vw.push_back(sw);
-    }
-
-    warning_context context;
-    context.warn = warn;
-    context.line_number = line_num;
-
-    // line
-    if (token[0] == 'l' && IS_SPACE((token[1]))) {
-      token += 2;
-
-      __line_t line;
-
-      while (!IS_NEW_LINE(token[0])) {
-        vertex_index_t vi;
-        if (!parseTriple(&token, static_cast<int>(v.size() / 3),
-                         static_cast<int>(vn.size() / 3),
-                         static_cast<int>(vt.size() / 2), &vi, context)) {
-          if (err) {
-            (*err) += "Failed to parse `l' line (e.g. a zero value for vertex index. Line " +
-                toString(line_num) + ").\n";
-          }
-          return false;
-        }
-
-        line.vertex_indices.push_back(vi);
-
-        size_t n = strspn(token, " \t\r");
-        token += n;
-      }
-
-      prim_group.lineGroup.push_back(line);
-
-      continue;
-    }
-
-    // points
-    if (token[0] == 'p' && IS_SPACE((token[1]))) {
-      token += 2;
-
-      __points_t pts;
-
-      while (!IS_NEW_LINE(token[0])) {
-        vertex_index_t vi;
-        if (!parseTriple(&token, static_cast<int>(v.size() / 3),
-                         static_cast<int>(vn.size() / 3),
-                         static_cast<int>(vt.size() / 2), &vi, context)) {
-          if (err) {
-            (*err) += "Failed to parse `p' line (e.g. a zero value for vertex index. Line " +
-                toString(line_num) + ").\n";
-          }
-          return false;
-        }
-
-        pts.vertex_indices.push_back(vi);
-
-        size_t n = strspn(token, " \t\r");
-        token += n;
-      }
-
-      prim_group.pointsGroup.push_back(pts);
-
-      continue;
-    }
-
-    // face
-    if (token[0] == 'f' && IS_SPACE((token[1]))) {
-      token += 2;
-      token += strspn(token, " \t");
-
-      face_t face;
-
-      face.smoothing_group_id = current_smoothing_id;
-      face.vertex_indices.reserve(3);
-
-      while (!IS_NEW_LINE(token[0])) {
-        vertex_index_t vi;
-        if (!parseTriple(&token, static_cast<int>(v.size() / 3),
-                         static_cast<int>(vn.size() / 3),
-                         static_cast<int>(vt.size() / 2), &vi, context)) {
-          if (err) {
-            (*err) += "Failed to parse `f' line (e.g. a zero value for vertex index. Line " +
-                toString(line_num) + ").\n";
-          }
-          return false;
-        }
-
-        greatest_v_idx = greatest_v_idx > vi.v_idx ? greatest_v_idx : vi.v_idx;
-        greatest_vn_idx =
-            greatest_vn_idx > vi.vn_idx ? greatest_vn_idx : vi.vn_idx;
-        greatest_vt_idx =
-            greatest_vt_idx > vi.vt_idx ? greatest_vt_idx : vi.vt_idx;
-
-        face.vertex_indices.push_back(vi);
-        size_t n = strspn(token, " \t\r");
-        token += n;
-      }
-
-      // replace with emplace_back + std::move on C++11
-      prim_group.faceGroup.push_back(face);
-
-      continue;
-    }
-
-    // use mtl
-    if ((0 == strncmp(token, "usemtl", 6))) {
-      token += 6;
-      std::string namebuf = parseString(&token);
-
-      int newMaterialId = -1;
-      std::map<std::string, int>::const_iterator it =
-          material_map.find(namebuf);
-      if (it != material_map.end()) {
-        newMaterialId = it->second;
-      } else {
-        // { error!! material not found }
-        if (warn) {
-          (*warn) += "material [ '" + namebuf + "' ] not found in .mtl\n";
-        }
-      }
-
-      if (newMaterialId != material) {
-        // Create per-face material. Thus we don't add `shape` to `shapes` at
-        // this time.
-        // just clear `faceGroup` after `exportGroupsToShape()` call.
-        exportGroupsToShape(&shape, prim_group, tags, material, name,
-                            triangulate, v, warn);
-        prim_group.faceGroup.clear();
-        material = newMaterialId;
-      }
-
-      continue;
-    }
-
-    // load mtl
-    if ((0 == strncmp(token, "mtllib", 6)) && IS_SPACE((token[6]))) {
-      if (readMatFn) {
-        token += 7;
-
-        std::vector<std::string> filenames;
-        SplitString(std::string(token), ' ', '\\', filenames);
-
-        if (filenames.empty()) {
-          if (warn) {
-            std::stringstream ss;
-            ss << "Looks like empty filename for mtllib. Use default "
-                  "material (line "
-               << line_num << ".)\n";
-
-            (*warn) += ss.str();
-          }
-        } else {
-          bool found = false;
-          for (size_t s = 0; s < filenames.size(); s++) {
-            if (material_filenames.count(filenames[s]) > 0) {
-              found = true;
-              continue;
-            }
-
-            std::string warn_mtl;
-            std::string err_mtl;
-            bool ok = (*readMatFn)(filenames[s].c_str(), materials,
-                                   &material_map, &warn_mtl, &err_mtl);
-            if (warn && (!warn_mtl.empty())) {
-              (*warn) += warn_mtl;
-            }
-
-            if (err && (!err_mtl.empty())) {
-              (*err) += err_mtl;
-            }
-
-            if (ok) {
-              found = true;
-              material_filenames.insert(filenames[s]);
-              break;
-            }
-          }
-
-          if (!found) {
-            if (warn) {
-              (*warn) +=
-                  "Failed to load material file(s). Use default "
-                  "material.\n";
-            }
-          }
-        }
-      }
-
-      continue;
-    }
-
-    // group name
-    if (token[0] == 'g' && IS_SPACE((token[1]))) {
-      // flush previous face group.
-      bool ret = exportGroupsToShape(&shape, prim_group, tags, material, name,
-                                     triangulate, v, warn);
-      (void)ret;  // return value not used.
-
-      if (shape.mesh.indices.size() > 0) {
-        shapes->push_back(shape);
-      }
-
-      shape = shape_t();
-
-      // material = -1;
-      prim_group.clear();
-
-      std::vector<std::string> names;
-
-      while (!IS_NEW_LINE(token[0])) {
-        std::string str = parseString(&token);
-        names.push_back(str);
-        token += strspn(token, " \t\r");  // skip tag
-      }
-
-      // names[0] must be 'g'
-
-      if (names.size() < 2) {
-        // 'g' with empty names
-        if (warn) {
-          std::stringstream ss;
-          ss << "Empty group name. line: " << line_num << "\n";
-          (*warn) += ss.str();
-          name = "";
-        }
-      } else {
-        std::stringstream ss;
-        ss << names[1];
-
-        // tinyobjloader does not support multiple groups for a primitive.
-        // Currently we concatinate multiple group names with a space to get
-        // single group name.
-
-        for (size_t i = 2; i < names.size(); i++) {
-          ss << " " << names[i];
-        }
-
-        name = ss.str();
-      }
-
-      continue;
-    }
-
-    // object name
-    if (token[0] == 'o' && IS_SPACE((token[1]))) {
-      // flush previous face group.
-      bool ret = exportGroupsToShape(&shape, prim_group, tags, material, name,
-                                     triangulate, v, warn);
-      (void)ret;  // return value not used.
-
-      if (shape.mesh.indices.size() > 0 || shape.lines.indices.size() > 0 ||
-          shape.points.indices.size() > 0) {
-        shapes->push_back(shape);
-      }
-
-      // material = -1;
-      prim_group.clear();
-      shape = shape_t();
-
-      // @todo { multiple object name? }
-      token += 2;
-      std::stringstream ss;
-      ss << token;
-      name = ss.str();
-
-      continue;
-    }
-
-    if (token[0] == 't' && IS_SPACE(token[1])) {
-      const int max_tag_nums = 8192;  // FIXME(syoyo): Parameterize.
-      tag_t tag;
-
-      token += 2;
-
-      tag.name = parseString(&token);
-
-      tag_sizes ts = parseTagTriple(&token);
-
-      if (ts.num_ints < 0) {
-        ts.num_ints = 0;
-      }
-      if (ts.num_ints > max_tag_nums) {
-        ts.num_ints = max_tag_nums;
-      }
-
-      if (ts.num_reals < 0) {
-        ts.num_reals = 0;
-      }
-      if (ts.num_reals > max_tag_nums) {
-        ts.num_reals = max_tag_nums;
-      }
-
-      if (ts.num_strings < 0) {
-        ts.num_strings = 0;
-      }
-      if (ts.num_strings > max_tag_nums) {
-        ts.num_strings = max_tag_nums;
-      }
-
-      tag.intValues.resize(static_cast<size_t>(ts.num_ints));
-
-      for (size_t i = 0; i < static_cast<size_t>(ts.num_ints); ++i) {
-        tag.intValues[i] = parseInt(&token);
-      }
-
-      tag.floatValues.resize(static_cast<size_t>(ts.num_reals));
-      for (size_t i = 0; i < static_cast<size_t>(ts.num_reals); ++i) {
-        tag.floatValues[i] = parseReal(&token);
-      }
-
-      tag.stringValues.resize(static_cast<size_t>(ts.num_strings));
-      for (size_t i = 0; i < static_cast<size_t>(ts.num_strings); ++i) {
-        tag.stringValues[i] = parseString(&token);
-      }
-
-      tags.push_back(tag);
-
-      continue;
-    }
-
-    if (token[0] == 's' && IS_SPACE(token[1])) {
-      // smoothing group id
-      token += 2;
-
-      // skip space.
-      token += strspn(token, " \t");  // skip space
-
-      if (token[0] == '\0') {
-        continue;
-      }
-
-      if (token[0] == '\r' || token[1] == '\n') {
-        continue;
-      }
-
-      if (strlen(token) >= 3 && token[0] == 'o' && token[1] == 'f' &&
-          token[2] == 'f') {
-        current_smoothing_id = 0;
-      } else {
-        // assume number
-        int smGroupId = parseInt(&token);
-        if (smGroupId < 0) {
-          // parse error. force set to 0.
-          // FIXME(syoyo): Report warning.
-          current_smoothing_id = 0;
-        } else {
-          current_smoothing_id = static_cast<unsigned int>(smGroupId);
-        }
-      }
-
-      continue;
-    }  // smoothing group id
-
-    // Ignore unknown command.
-  }
-
-  // not all vertices have colors, no default colors desired? -> clear colors
-  if (!found_all_colors && !default_vcols_fallback) {
-    vc.clear();
-  }
-
-  if (greatest_v_idx >= static_cast<int>(v.size() / 3)) {
-    if (warn) {
-      std::stringstream ss;
-      ss << "Vertex indices out of bounds (line " << line_num << ".)\n\n";
-      (*warn) += ss.str();
-    }
-  }
-  if (greatest_vn_idx >= static_cast<int>(vn.size() / 3)) {
-    if (warn) {
-      std::stringstream ss;
-      ss << "Vertex normal indices out of bounds (line " << line_num << ".)\n\n";
-      (*warn) += ss.str();
-    }
-  }
-  if (greatest_vt_idx >= static_cast<int>(vt.size() / 2)) {
-    if (warn) {
-      std::stringstream ss;
-      ss << "Vertex texcoord indices out of bounds (line " << line_num << ".)\n\n";
-      (*warn) += ss.str();
-    }
-  }
-
-  bool ret = exportGroupsToShape(&shape, prim_group, tags, material, name,
-                                 triangulate, v, warn);
-  // exportGroupsToShape return false when `usemtl` is called in the last
-  // line.
-  // we also add `shape` to `shapes` when `shape.mesh` has already some
-  // faces(indices)
-  if (ret || shape.mesh.indices
-                 .size()) {  // FIXME(syoyo): Support other prims(e.g. lines)
-    shapes->push_back(shape);
-  }
-  prim_group.clear();  // for safety
-
-  if (err) {
-    (*err) += errss.str();
-  }
-
-  attrib->vertices.swap(v);
-  attrib->vertex_weights.swap(v);
-  attrib->normals.swap(vn);
-  attrib->texcoords.swap(vt);
-  attrib->texcoord_ws.swap(vt);
-  attrib->colors.swap(vc);
-  attrib->skin_weights.swap(vw);
-
-  return true;
-}
-
-bool LoadObjWithCallback(std::istream &inStream, const callback_t &callback,
-                         void *user_data /*= NULL*/,
-                         MaterialReader *readMatFn /*= NULL*/,
-                         std::string *warn, /* = NULL*/
-                         std::string *err /*= NULL*/) {
-  std::stringstream errss;
-
-  // material
-  std::set<std::string> material_filenames;
-  std::map<std::string, int> material_map;
-  int material_id = -1;  // -1 = invalid
-
-  std::vector<index_t> indices;
-  std::vector<material_t> materials;
-  std::vector<std::string> names;
-  names.reserve(2);
-  std::vector<const char *> names_out;
-
-  std::string linebuf;
-  while (inStream.peek() != -1) {
-    safeGetline(inStream, linebuf);
-
-    // Trim newline '\r\n' or '\n'
-    if (linebuf.size() > 0) {
-      if (linebuf[linebuf.size() - 1] == '\n')
-        linebuf.erase(linebuf.size() - 1);
-    }
-    if (linebuf.size() > 0) {
-      if (linebuf[linebuf.size() - 1] == '\r')
-        linebuf.erase(linebuf.size() - 1);
-    }
-
-    // Skip if empty line.
-    if (linebuf.empty()) {
-      continue;
-    }
-
-    // Skip leading space.
-    const char *token = linebuf.c_str();
-    token += strspn(token, " \t");
-
-    assert(token);
-    if (token[0] == '\0') continue;  // empty line
-
-    if (token[0] == '#') continue;  // comment line
-
-    // vertex
-    if (token[0] == 'v' && IS_SPACE((token[1]))) {
-      token += 2;
-      real_t x, y, z;
-      real_t r, g, b;
-
-      bool found_color = parseVertexWithColor(&x, &y, &z, &r, &g, &b, &token);
-      if (callback.vertex_cb) {
-        callback.vertex_cb(user_data, x, y, z, r);  // r=w is optional
-      }
-      if (callback.vertex_color_cb) {
-        callback.vertex_color_cb(user_data, x, y, z, r, g, b, found_color);
-      }
-      continue;
-    }
-
-    // normal
-    if (token[0] == 'v' && token[1] == 'n' && IS_SPACE((token[2]))) {
-      token += 3;
-      real_t x, y, z;
-      parseReal3(&x, &y, &z, &token);
-      if (callback.normal_cb) {
-        callback.normal_cb(user_data, x, y, z);
-      }
-      continue;
-    }
-
-    // texcoord
-    if (token[0] == 'v' && token[1] == 't' && IS_SPACE((token[2]))) {
-      token += 3;
-      real_t x, y, z;  // y and z are optional. default = 0.0
-      parseReal3(&x, &y, &z, &token);
-      if (callback.texcoord_cb) {
-        callback.texcoord_cb(user_data, x, y, z);
-      }
-      continue;
-    }
-
-    // face
-    if (token[0] == 'f' && IS_SPACE((token[1]))) {
-      token += 2;
-      token += strspn(token, " \t");
-
-      indices.clear();
-      while (!IS_NEW_LINE(token[0])) {
-        vertex_index_t vi = parseRawTriple(&token);
-
-        index_t idx;
-        idx.vertex_index = vi.v_idx;
-        idx.normal_index = vi.vn_idx;
-        idx.texcoord_index = vi.vt_idx;
-
-        indices.push_back(idx);
-        size_t n = strspn(token, " \t\r");
-        token += n;
-      }
-
-      if (callback.index_cb && indices.size() > 0) {
-        callback.index_cb(user_data, &indices.at(0),
-                          static_cast<int>(indices.size()));
-      }
-
-      continue;
-    }
-
-    // use mtl
-    if ((0 == strncmp(token, "usemtl", 6)) && IS_SPACE((token[6]))) {
-      token += 7;
-      std::stringstream ss;
-      ss << token;
-      std::string namebuf = ss.str();
-
-      int newMaterialId = -1;
-      std::map<std::string, int>::const_iterator it =
-          material_map.find(namebuf);
-      if (it != material_map.end()) {
-        newMaterialId = it->second;
-      } else {
-        // { warn!! material not found }
-        if (warn && (!callback.usemtl_cb)) {
-          (*warn) += "material [ " + namebuf + " ] not found in .mtl\n";
-        }
-      }
-
-      if (newMaterialId != material_id) {
-        material_id = newMaterialId;
-      }
-
-      if (callback.usemtl_cb) {
-        callback.usemtl_cb(user_data, namebuf.c_str(), material_id);
-      }
-
-      continue;
-    }
-
-    // load mtl
-    if ((0 == strncmp(token, "mtllib", 6)) && IS_SPACE((token[6]))) {
-      if (readMatFn) {
-        token += 7;
-
-        std::vector<std::string> filenames;
-        SplitString(std::string(token), ' ', '\\', filenames);
-
-        if (filenames.empty()) {
-          if (warn) {
-            (*warn) +=
-                "Looks like empty filename for mtllib. Use default "
-                "material. \n";
-          }
-        } else {
-          bool found = false;
-          for (size_t s = 0; s < filenames.size(); s++) {
-            if (material_filenames.count(filenames[s]) > 0) {
-              found = true;
-              continue;
-            }
-
-            std::string warn_mtl;
-            std::string err_mtl;
-            bool ok = (*readMatFn)(filenames[s].c_str(), &materials,
-                                   &material_map, &warn_mtl, &err_mtl);
-
-            if (warn && (!warn_mtl.empty())) {
-              (*warn) += warn_mtl;  // This should be warn message.
-            }
-
-            if (err && (!err_mtl.empty())) {
-              (*err) += err_mtl;
-            }
-
-            if (ok) {
-              found = true;
-              material_filenames.insert(filenames[s]);
-              break;
-            }
-          }
-
-          if (!found) {
-            if (warn) {
-              (*warn) +=
-                  "Failed to load material file(s). Use default "
-                  "material.\n";
-            }
-          } else {
-            if (callback.mtllib_cb) {
-              callback.mtllib_cb(user_data, &materials.at(0),
-                                 static_cast<int>(materials.size()));
-            }
-          }
-        }
-      }
-
-      continue;
-    }
-
-    // group name
-    if (token[0] == 'g' && IS_SPACE((token[1]))) {
-      names.clear();
-
-      while (!IS_NEW_LINE(token[0])) {
-        std::string str = parseString(&token);
-        names.push_back(str);
-        token += strspn(token, " \t\r");  // skip tag
-      }
-
-      assert(names.size() > 0);
-
-      if (callback.group_cb) {
-        if (names.size() > 1) {
-          // create const char* array.
-          names_out.resize(names.size() - 1);
-          for (size_t j = 0; j < names_out.size(); j++) {
-            names_out[j] = names[j + 1].c_str();
-          }
-          callback.group_cb(user_data, &names_out.at(0),
-                            static_cast<int>(names_out.size()));
-
-        } else {
-          callback.group_cb(user_data, NULL, 0);
-        }
-      }
-
-      continue;
-    }
-
-    // object name
-    if (token[0] == 'o' && IS_SPACE((token[1]))) {
-      // @todo { multiple object name? }
-      token += 2;
-
-      std::stringstream ss;
-      ss << token;
-      std::string object_name = ss.str();
-
-      if (callback.object_cb) {
-        callback.object_cb(user_data, object_name.c_str());
-      }
-
-      continue;
-    }
-
-#if 0  // @todo
-    if (token[0] == 't' && IS_SPACE(token[1])) {
-      tag_t tag;
-
-      token += 2;
-      std::stringstream ss;
-      ss << token;
-      tag.name = ss.str();
-
-      token += tag.name.size() + 1;
-
-      tag_sizes ts = parseTagTriple(&token);
-
-      tag.intValues.resize(static_cast<size_t>(ts.num_ints));
-
-      for (size_t i = 0; i < static_cast<size_t>(ts.num_ints); ++i) {
-        tag.intValues[i] = atoi(token);
-        token += strcspn(token, "/ \t\r") + 1;
-      }
-
-      tag.floatValues.resize(static_cast<size_t>(ts.num_reals));
-      for (size_t i = 0; i < static_cast<size_t>(ts.num_reals); ++i) {
-        tag.floatValues[i] = parseReal(&token);
-        token += strcspn(token, "/ \t\r") + 1;
-      }
-
-      tag.stringValues.resize(static_cast<size_t>(ts.num_strings));
-      for (size_t i = 0; i < static_cast<size_t>(ts.num_strings); ++i) {
-        std::stringstream ss;
-        ss << token;
-        tag.stringValues[i] = ss.str();
-        token += tag.stringValues[i].size() + 1;
-      }
-
-      tags.push_back(tag);
-    }
-#endif
-
-    // Ignore unknown command.
-  }
-
-  if (err) {
-    (*err) += errss.str();
-  }
-
-  return true;
-}
-
-bool ObjReader::ParseFromFile(const std::string &filename,
-                              const ObjReaderConfig &config) {
-  std::string mtl_search_path;
-
-  if (config.mtl_search_path.empty()) {
-    //
-    // split at last '/'(for unixish system) or '\\'(for windows) to get
-    // the base directory of .obj file
-    //
-    size_t pos = filename.find_last_of("/\\");
-    if (pos != std::string::npos) {
-      mtl_search_path = filename.substr(0, pos);
-    }
-  } else {
-    mtl_search_path = config.mtl_search_path;
-  }
-
-  valid_ = LoadObj(&attrib_, &shapes_, &materials_, &warning_, &error_,
-                   filename.c_str(), mtl_search_path.c_str(),
-                   config.triangulate, config.vertex_color);
-
-  return valid_;
-}
-
-bool ObjReader::ParseFromString(const std::string &obj_text,
-                                const std::string &mtl_text,
-                                const ObjReaderConfig &config) {
-  std::stringbuf obj_buf(obj_text);
-  std::stringbuf mtl_buf(mtl_text);
-
-  std::istream obj_ifs(&obj_buf);
-  std::istream mtl_ifs(&mtl_buf);
-
-  MaterialStreamReader mtl_ss(mtl_ifs);
-
-  valid_ = LoadObj(&attrib_, &shapes_, &materials_, &warning_, &error_,
-                   &obj_ifs, &mtl_ss, config.triangulate, config.vertex_color);
-
-  return valid_;
-}
-
-#ifdef __clang__
-#pragma clang diagnostic pop
-#endif
-}  // namespace tinyobj
-
-#endif