diff --git a/.gitignore b/.gitignore index 0067a97..87385e5 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,4 @@ -/build -/debug .cache .vscode +/zig-cache +/zig-out diff --git a/build.zig b/build.zig new file mode 100644 index 0000000..ba9db36 --- /dev/null +++ b/build.zig @@ -0,0 +1,55 @@ +const std = @import("std"); +const zmath = @import("lib/zmath/build.zig"); + +pub fn build(b: *std.Build) void { + // Standard target options allows the person running `zig build` to choose + // what target to build for. Here we do not override the defaults, which + // means any target is allowed, and the default is native. Other options + // for restricting supported target set are available. + + const target = b.standardTargetOptions(.{}); + + // Standard release options allow the person running `zig build` to select + // between Debug, ReleaseSafe, ReleaseFast, and ReleaseSmall. + const mode = b.standardOptimizeOption(.{}); + + const exe = b.addExecutable(.{ + .name = "somaesque-native-zig", + .root_source_file = .{ .path = "src/main.zig" }, + .target = target, + .optimize = mode, + }); + exe.addIncludePath("/usr/local/include"); + + exe.linkLibC(); + exe.linkSystemLibrary("glfw3"); + exe.linkSystemLibrary("glm"); + exe.linkSystemLibrary("GL"); + exe.addIncludePath("lib/c"); + + exe.addCSourceFile("lib/c/glad/glad.c", &[_][]const u8{"-std=c11"}); + + exe.install(); + + // zmath + const zmath_pkg = zmath.package(b, target, mode, .{ + .options = .{ .enable_cross_platform_determinism = true }, + }); + zmath_pkg.link(exe); + + const run_cmd = exe.run(); + run_cmd.step.dependOn(b.getInstallStep()); + if (b.args) |args| { + run_cmd.addArgs(args); + } + + const run_step = b.step("run", "Run the app"); + run_step.dependOn(&run_cmd.step); + + //const exe_tests = b.addTest("src/main.zig"); + //exe_tests.setTarget(target); + //exe_tests.setBuildMode(mode); + + //const test_step = b.step("test", "Run unit tests"); + //test_step.dependOn(&exe_tests.step); +} diff --git a/vendor/KHR/khrplatform.h b/lib/c/KHR/khrplatform.h similarity index 100% rename from vendor/KHR/khrplatform.h rename to lib/c/KHR/khrplatform.h diff --git a/vendor/glad/glad.c b/lib/c/glad/glad.c similarity index 100% rename from vendor/glad/glad.c rename to lib/c/glad/glad.c diff --git a/vendor/glad/glad.h b/lib/c/glad/glad.h similarity index 100% rename from vendor/glad/glad.h rename to lib/c/glad/glad.h diff --git a/vendor/loaders/stb_image.h b/lib/c/loaders/stb_image.h similarity index 100% rename from vendor/loaders/stb_image.h rename to lib/c/loaders/stb_image.h diff --git a/lib/c/loaders/tinyobj.h b/lib/c/loaders/tinyobj.h new file mode 100644 index 0000000..cbfa301 --- /dev/null +++ b/lib/c/loaders/tinyobj.h @@ -0,0 +1,1739 @@ +/* + The MIT License (MIT) + + Copyright (c) 2016 - 2019 Syoyo Fujita and many contributors. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. + */ +#ifndef TINOBJ_LOADER_C_H_ +#define TINOBJ_LOADER_C_H_ + +/* @todo { Remove stddef dependency. size_t? } */ +#include + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct { + char *name; + + float ambient[3]; + float diffuse[3]; + float specular[3]; + float transmittance[3]; + float emission[3]; + float shininess; + float ior; /* index of refraction */ + float dissolve; /* 1 == opaque; 0 == fully transparent */ + /* illumination model (see http://www.fileformat.info/format/material/) */ + int illum; + + int pad0; + + char *ambient_texname; /* map_Ka */ + char *diffuse_texname; /* map_Kd */ + char *specular_texname; /* map_Ks */ + char *specular_highlight_texname; /* map_Ns */ + char *bump_texname; /* map_bump, bump */ + char *displacement_texname; /* disp */ + char *alpha_texname; /* map_d */ +} tinyobj_material_t; + +typedef struct { + char *name; /* group name or object name. */ + unsigned int face_offset; + unsigned int length; +} tinyobj_shape_t; + +typedef struct { int v_idx, vt_idx, vn_idx; } tinyobj_vertex_index_t; + +typedef struct { + unsigned int num_vertices; + unsigned int num_normals; + unsigned int num_texcoords; + unsigned int num_faces; + unsigned int num_face_num_verts; + + int pad0; + + float *vertices; + float *normals; + float *texcoords; + tinyobj_vertex_index_t *faces; + int *face_num_verts; + int *material_ids; +} tinyobj_attrib_t; + + +#define TINYOBJ_FLAG_TRIANGULATE (1 << 0) + +#define TINYOBJ_INVALID_INDEX (0x80000000) + +#define TINYOBJ_SUCCESS (0) +#define TINYOBJ_ERROR_EMPTY (-1) +#define TINYOBJ_ERROR_INVALID_PARAMETER (-2) +#define TINYOBJ_ERROR_FILE_OPERATION (-3) + +/* Provide a callback that can read text file without any parsing or modification. + * The obj and mtl parser is going to read all the necessary data: + * tinyobj_parse_obj + * tinyobj_parse_mtl_file + * + * @param[in] ctx User provided context. + * @param[in] filename Filename to be loaded. + * @param[in] is_mtl 1 when the callback is invoked for loading .mtl. 0 for .obj + * @param[in] obj_filename .obj filename. Useful when you load .mtl from same location of .obj. When the callback is called to load .obj, `filename` and `obj_filename` are same. + * @param[out] buf Content of loaded file + * @param[out] len Size of content(file) + */ +typedef void (*file_reader_callback)(void *ctx, const char *filename, int is_mtl, const char *obj_filename, char **buf, size_t *len); + +/* Parse wavefront .obj + * @param[out] attrib Attibutes + * @param[out] shapes Array of parsed shapes + * @param[out] num_shapes Array length of `shapes` + * @param[out] materials Array of parsed materials + * @param[out] num_materials Array length of `materials` + * @param[in] file_name File name of .obj + * @param[in] file_reader File reader callback function(to read .obj and .mtl). + * @param[in] ctx Context pointer passed to the file_reader_callback. + * @param[in] flags combination of TINYOBJ_FLAG_*** + * + * Returns TINYOBJ_SUCCESS if things goes well. + * Returns TINYOBJ_ERR_*** when there is an error. + */ +extern int tinyobj_parse_obj(tinyobj_attrib_t *attrib, tinyobj_shape_t **shapes, + size_t *num_shapes, tinyobj_material_t **materials, + size_t *num_materials, const char *file_name, file_reader_callback file_reader, + void *ctx, unsigned int flags); + +/* Parse wavefront .mtl + * + * @param[out] materials_out + * @param[out] num_materials_out + * @param[in] filename .mtl filename + * @param[in] filename of .obj filename. could be NULL if you just want to parse .mtl file. + * @param[in] file_reader File reader callback + * @param[in[ ctx Context pointer passed to the file_reader callack. + + * Returns TINYOBJ_SUCCESS if things goes well. + * Returns TINYOBJ_ERR_*** when there is an error. + */ +extern int tinyobj_parse_mtl_file(tinyobj_material_t **materials_out, + size_t *num_materials_out, + const char *filename, const char *obj_filename, file_reader_callback file_reader, + void *ctx); + +extern void tinyobj_attrib_init(tinyobj_attrib_t *attrib); +extern void tinyobj_attrib_free(tinyobj_attrib_t *attrib); +extern void tinyobj_shapes_free(tinyobj_shape_t *shapes, size_t num_shapes); +extern void tinyobj_materials_free(tinyobj_material_t *materials, + size_t num_materials); + +#ifdef __cplusplus +} +#endif + +#ifdef TINYOBJ_LOADER_C_IMPLEMENTATION +#include +#include +#include +#include + +#if defined(TINYOBJ_MALLOC) && defined(TINYOBJ_CALLOC) && defined(TINYOBJ_FREE) && (defined(TINYOBJ_REALLOC) || defined(TINYOBJ_REALLOC_SIZED)) +/* ok */ +#elif !defined(TINYOBJ_MALLOC) && !defined(TINYOBJ_CALLOC) && !defined(TINYOBJ_FREE) && !defined(TINYOBJ_REALLOC) && !defined(TINYOBJ_REALLOC_SIZED) +/* ok */ +#else +#error "Must define all or none of TINYOBJ_MALLOC, TINYOBJ_CALLOC, TINYOBJ_FREE, and TINYOBJ_REALLOC (or TINYOBJ_REALLOC_SIZED)." +#endif + +#ifndef TINYOBJ_MALLOC +#include +#define TINYOBJ_MALLOC malloc +#define TINYOBJ_REALLOC realloc +#define TINYOBJ_CALLOC calloc +#define TINYOBJ_FREE free +#endif + +#ifndef TINYOBJ_REALLOC_SIZED +#define TINYOBJ_REALLOC_SIZED(p,oldsz,newsz) TINYOBJ_REALLOC(p,newsz) +#endif + +#define TINYOBJ_MAX_FACES_PER_F_LINE (16) +#define TINYOBJ_MAX_FILEPATH (8192) + +#define IS_SPACE(x) (((x) == ' ') || ((x) == '\t')) +#define IS_DIGIT(x) ((unsigned int)((x) - '0') < (unsigned int)(10)) +#define IS_NEW_LINE(x) (((x) == '\r') || ((x) == '\n') || ((x) == '\0')) + +static void skip_space(const char **token) { + while ((*token)[0] == ' ' || (*token)[0] == '\t') { + (*token)++; + } +} + +static void skip_space_and_cr(const char **token) { + while ((*token)[0] == ' ' || (*token)[0] == '\t' || (*token)[0] == '\r') { + (*token)++; + } +} + +static int until_space(const char *token) { + const char *p = token; + while (p[0] != '\0' && p[0] != ' ' && p[0] != '\t' && p[0] != '\r') { + p++; + } + + return (int)(p - token); +} + +static size_t length_until_newline(const char *token, size_t n) { + size_t len = 0; + + /* Assume token[n-1] = '\0' */ + for (len = 0; len < n - 1; len++) { + if (token[len] == '\n') { + break; + } + if ((token[len] == '\r') && ((len < (n - 2)) && (token[len + 1] != '\n'))) { + break; + } + } + + return len; +} + +static size_t length_until_line_feed(const char *token, size_t n) { + size_t len = 0; + + /* Assume token[n-1] = '\0' */ + for (len = 0; len < n; len++) { + if ((token[len] == '\n') || (token[len] == '\r')) { + break; + } + } + + return len; +} + +/* http://stackoverflow.com/questions/5710091/how-does-atoi-function-in-c-work +*/ +static int my_atoi(const char *c) { + int value = 0; + int sign = 1; + if (*c == '+' || *c == '-') { + if (*c == '-') sign = -1; + c++; + } + while (((*c) >= '0') && ((*c) <= '9')) { /* isdigit(*c) */ + value *= 10; + value += (int)(*c - '0'); + c++; + } + return value * sign; +} + +/* Make index zero-base, and also support relative index. */ +static int fixIndex(int idx, size_t n) { + if (idx > 0) return idx - 1; + if (idx == 0) return 0; + return (int)n + idx; /* negative value = relative */ +} + +/* Parse raw triples: i, i/j/k, i//k, i/j */ +static tinyobj_vertex_index_t parseRawTriple(const char **token) { + tinyobj_vertex_index_t vi; + /* 0x80000000 = -2147483648 = invalid */ + vi.v_idx = (int)(0x80000000); + vi.vn_idx = (int)(0x80000000); + vi.vt_idx = (int)(0x80000000); + + vi.v_idx = my_atoi((*token)); + while ((*token)[0] != '\0' && (*token)[0] != '/' && (*token)[0] != ' ' && + (*token)[0] != '\t' && (*token)[0] != '\r') { + (*token)++; + } + if ((*token)[0] != '/') { + return vi; + } + (*token)++; + + /* i//k */ + if ((*token)[0] == '/') { + (*token)++; + vi.vn_idx = my_atoi((*token)); + while ((*token)[0] != '\0' && (*token)[0] != '/' && (*token)[0] != ' ' && + (*token)[0] != '\t' && (*token)[0] != '\r') { + (*token)++; + } + return vi; + } + + /* i/j/k or i/j */ + vi.vt_idx = my_atoi((*token)); + while ((*token)[0] != '\0' && (*token)[0] != '/' && (*token)[0] != ' ' && + (*token)[0] != '\t' && (*token)[0] != '\r') { + (*token)++; + } + if ((*token)[0] != '/') { + return vi; + } + + /* i/j/k */ + (*token)++; /* skip '/' */ + vi.vn_idx = my_atoi((*token)); + while ((*token)[0] != '\0' && (*token)[0] != '/' && (*token)[0] != ' ' && + (*token)[0] != '\t' && (*token)[0] != '\r') { + (*token)++; + } + return vi; +} + +static int parseInt(const char **token) { + int i = 0; + skip_space(token); + i = my_atoi((*token)); + (*token) += until_space((*token)); + return i; +} + +/* + * Tries to parse a floating point number located at s. + * + * s_end should be a location in the string where reading should absolutely + * stop. For example at the end of the string, to prevent buffer overflows. + * + * Parses the following EBNF grammar: + * sign = "+" | "-" ; + * END = ? anything not in digit ? + * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" ; + * integer = [sign] , digit , {digit} ; + * decimal = integer , ["." , integer] ; + * float = ( decimal , END ) | ( decimal , ("E" | "e") , integer , END ) ; + * + * Valid strings are for example: + * -0 +3.1417e+2 -0.0E-3 1.0324 -1.41 11e2 + * + * If the parsing is a success, result is set to the parsed value and true + * is returned. + * + * The function is greedy and will parse until any of the following happens: + * - a non-conforming character is encountered. + * - s_end is reached. + * + * The following situations triggers a failure: + * - s >= s_end. + * - parse failure. + */ +static int tryParseDouble(const char *s, const char *s_end, double *result) { + double mantissa = 0.0; + /* This exponent is base 2 rather than 10. + * However the exponent we parse is supposed to be one of ten, + * thus we must take care to convert the exponent/and or the + * mantissa to a * 2^E, where a is the mantissa and E is the + * exponent. + * To get the final double we will use ldexp, it requires the + * exponent to be in base 2. + */ + int exponent = 0; + + /* NOTE: THESE MUST BE DECLARED HERE SINCE WE ARE NOT ALLOWED + * TO JUMP OVER DEFINITIONS. + */ + char sign = '+'; + char exp_sign = '+'; + char const *curr = s; + + /* How many characters were read in a loop. */ + int read = 0; + /* Tells whether a loop terminated due to reaching s_end. */ + int end_not_reached = 0; + + /* + BEGIN PARSING. + */ + + if (s >= s_end) { + return 0; /* fail */ + } + + /* Find out what sign we've got. */ + if (*curr == '+' || *curr == '-') { + sign = *curr; + curr++; + } else if (IS_DIGIT(*curr)) { /* Pass through. */ + } else { + goto fail; + } + + /* Read the integer part. */ + end_not_reached = (curr != s_end); + while (end_not_reached && IS_DIGIT(*curr)) { + mantissa *= 10; + mantissa += (int)(*curr - 0x30); + curr++; + read++; + end_not_reached = (curr != s_end); + } + + /* We must make sure we actually got something. */ + if (read == 0) goto fail; + /* We allow numbers of form "#", "###" etc. */ + if (!end_not_reached) goto assemble; + + /* Read the decimal part. */ + if (*curr == '.') { + curr++; + read = 1; + end_not_reached = (curr != s_end); + while (end_not_reached && IS_DIGIT(*curr)) { + /* pow(10.0, -read) */ + double frac_value = 1.0; + int f; + for (f = 0; f < read; f++) { + frac_value *= 0.1; + } + mantissa += (int)(*curr - 0x30) * frac_value; + read++; + curr++; + end_not_reached = (curr != s_end); + } + } else if (*curr == 'e' || *curr == 'E') { + } else { + goto assemble; + } + + if (!end_not_reached) goto assemble; + + /* Read the exponent part. */ + if (*curr == 'e' || *curr == 'E') { + curr++; + /* Figure out if a sign is present and if it is. */ + end_not_reached = (curr != s_end); + if (end_not_reached && (*curr == '+' || *curr == '-')) { + exp_sign = *curr; + curr++; + } else if (IS_DIGIT(*curr)) { /* Pass through. */ + } else { + /* Empty E is not allowed. */ + goto fail; + } + + read = 0; + end_not_reached = (curr != s_end); + while (end_not_reached && IS_DIGIT(*curr)) { + exponent *= 10; + exponent += (int)(*curr - 0x30); + curr++; + read++; + end_not_reached = (curr != s_end); + } + if (read == 0) goto fail; + } + +assemble : + + { + double a = 1.0; /* = pow(5.0, exponent); */ + double b = 1.0; /* = 2.0^exponent */ + int i; + for (i = 0; i < exponent; i++) { + a = a * 5.0; + } + + for (i = 0; i < exponent; i++) { + b = b * 2.0; + } + + if (exp_sign == '-') { + a = 1.0 / a; + b = 1.0 / b; + } + + *result = + /* (sign == '+' ? 1 : -1) * ldexp(mantissa * pow(5.0, exponent), + exponent); */ + (sign == '+' ? 1 : -1) * (mantissa * a * b); + } + + return 1; +fail: + return 0; +} + +static float parseFloat(const char **token) { + const char *end; + double val = 0.0; + float f = 0.0f; + skip_space(token); + end = (*token) + until_space((*token)); + val = 0.0; + tryParseDouble((*token), end, &val); + f = (float)(val); + (*token) = end; + return f; +} + +static void parseFloat2(float *x, float *y, const char **token) { + (*x) = parseFloat(token); + (*y) = parseFloat(token); +} + +static void parseFloat3(float *x, float *y, float *z, const char **token) { + (*x) = parseFloat(token); + (*y) = parseFloat(token); + (*z) = parseFloat(token); +} + +static size_t my_strnlen(const char *s, size_t n) { + const char *p = (char *)memchr(s, 0, n); + return p ? (size_t)(p - s) : n; +} + +static char *my_strdup(const char *s, size_t max_length) { + char *d; + size_t len; + + if (s == NULL) return NULL; + + /* Do not consider CRLF line ending(#19) */ + len = length_until_line_feed(s, max_length); + /* len = strlen(s); */ + + /* trim line ending and append '\0' */ + d = (char *)TINYOBJ_MALLOC(len + 1); /* + '\0' */ + memcpy(d, s, (size_t)(len)); + d[len] = '\0'; + + return d; +} + +static char *my_strndup(const char *s, size_t len) { + char *d; + size_t slen; + + if (s == NULL) return NULL; + if (len == 0) return NULL; + + slen = my_strnlen(s, len); + d = (char *)TINYOBJ_MALLOC(slen + 1); /* + '\0' */ + if (!d) { + return NULL; + } + memcpy(d, s, slen); + d[slen] = '\0'; + + return d; +} + +char *dynamic_fgets(char **buf, size_t *size, FILE *file) { + char *offset; + char *ret; + size_t old_size; + + if (!(ret = fgets(*buf, (int)*size, file))) { + return ret; + } + + if (NULL != strchr(*buf, '\n')) { + return ret; + } + + do { + old_size = *size; + *size *= 2; + *buf = (char*)TINYOBJ_REALLOC_SIZED(*buf, old_size, *size); + offset = &((*buf)[old_size - 1]); + + ret = fgets(offset, (int)(old_size + 1), file); + } while(ret && (NULL == strchr(*buf, '\n'))); + + return ret; +} + +static void initMaterial(tinyobj_material_t *material) { + int i; + material->name = NULL; + material->ambient_texname = NULL; + material->diffuse_texname = NULL; + material->specular_texname = NULL; + material->specular_highlight_texname = NULL; + material->bump_texname = NULL; + material->displacement_texname = NULL; + material->alpha_texname = NULL; + for (i = 0; i < 3; i++) { + material->ambient[i] = 0.f; + material->diffuse[i] = 0.f; + material->specular[i] = 0.f; + material->transmittance[i] = 0.f; + material->emission[i] = 0.f; + } + material->illum = 0; + material->dissolve = 1.f; + material->shininess = 1.f; + material->ior = 1.f; +} + +/* Implementation of string to int hashtable */ + +#define HASH_TABLE_ERROR 1 +#define HASH_TABLE_SUCCESS 0 + +#define HASH_TABLE_DEFAULT_SIZE 10 + +typedef struct hash_table_entry_t +{ + unsigned long hash; + int filled; + int pad0; + long value; + + struct hash_table_entry_t* next; +} hash_table_entry_t; + +typedef struct +{ + unsigned long* hashes; + hash_table_entry_t* entries; + size_t capacity; + size_t n; +} hash_table_t; + +static unsigned long hash_djb2(const unsigned char* str) +{ + unsigned long hash = 5381; + int c; + + while ((c = *str++)) { + hash = ((hash << 5) + hash) + (unsigned long)(c); + } + + return hash; +} + +static void create_hash_table(size_t start_capacity, hash_table_t* hash_table) +{ + if (start_capacity < 1) + start_capacity = HASH_TABLE_DEFAULT_SIZE; + hash_table->hashes = (unsigned long*) TINYOBJ_MALLOC(start_capacity * sizeof(unsigned long)); + hash_table->entries = (hash_table_entry_t*) TINYOBJ_CALLOC(start_capacity, sizeof(hash_table_entry_t)); + hash_table->capacity = start_capacity; + hash_table->n = 0; +} + +static void destroy_hash_table(hash_table_t* hash_table) +{ + TINYOBJ_FREE(hash_table->entries); + TINYOBJ_FREE(hash_table->hashes); +} + +/* Insert with quadratic probing */ +static int hash_table_insert_value(unsigned long hash, long value, hash_table_t* hash_table) +{ + /* Insert value */ + size_t start_index = hash % hash_table->capacity; + size_t index = start_index; + hash_table_entry_t* start_entry = hash_table->entries + start_index; + size_t i; + hash_table_entry_t* entry; + + for (i = 1; hash_table->entries[index].filled; i++) + { + if (i >= hash_table->capacity) + return HASH_TABLE_ERROR; + index = (start_index + (i * i)) % hash_table->capacity; + } + + entry = hash_table->entries + index; + entry->hash = hash; + entry->filled = 1; + entry->value = value; + + if (index != start_index) { + /* This is a new entry, but not the start entry, hence we need to add a next pointer to our entry */ + entry->next = start_entry->next; + start_entry->next = entry; + } + + return HASH_TABLE_SUCCESS; +} + +static int hash_table_insert(unsigned long hash, long value, hash_table_t* hash_table) +{ + int ret = hash_table_insert_value(hash, value, hash_table); + if (ret == HASH_TABLE_SUCCESS) + { + hash_table->hashes[hash_table->n] = hash; + hash_table->n++; + } + return ret; +} + +static hash_table_entry_t* hash_table_find(unsigned long hash, hash_table_t* hash_table) +{ + hash_table_entry_t* entry = hash_table->entries + (hash % hash_table->capacity); + while (entry) + { + if (entry->hash == hash && entry->filled) + { + return entry; + } + entry = entry->next; + } + return NULL; +} + +static void hash_table_maybe_grow(size_t new_n, hash_table_t* hash_table) +{ + size_t new_capacity; + hash_table_t new_hash_table; + size_t i; + + if (new_n <= hash_table->capacity) { + return; + } + new_capacity = 2 * ((2 * hash_table->capacity) > new_n ? hash_table->capacity : new_n); + /* Create a new hash table. We're not calling create_hash_table because we want to realloc the hash array */ + new_hash_table.hashes = hash_table->hashes = (unsigned long*) TINYOBJ_REALLOC_SIZED( + (void*) hash_table->hashes, sizeof(unsigned long) * hash_table->capacity, sizeof(unsigned long) * new_capacity); + new_hash_table.entries = (hash_table_entry_t*) TINYOBJ_CALLOC(new_capacity, sizeof(hash_table_entry_t)); + new_hash_table.capacity = new_capacity; + new_hash_table.n = hash_table->n; + + /* Rehash */ + for (i = 0; i < hash_table->capacity; i++) + { + hash_table_entry_t* entry = hash_table_find(hash_table->hashes[i], hash_table); + hash_table_insert_value(hash_table->hashes[i], entry->value, &new_hash_table); + } + + TINYOBJ_FREE(hash_table->entries); + (*hash_table) = new_hash_table; +} + +static int hash_table_exists(const char* name, hash_table_t* hash_table) +{ + return hash_table_find(hash_djb2((const unsigned char*)name), hash_table) != NULL; +} + +static void hash_table_set(const char* name, size_t val, hash_table_t* hash_table) +{ + /* Hash name */ + unsigned long hash = hash_djb2((const unsigned char *)name); + + hash_table_entry_t* entry = hash_table_find(hash, hash_table); + if (entry) + { + entry->value = (long)val; + return; + } + + /* Expand if necessary + * Grow until the element has been added + */ + do + { + hash_table_maybe_grow(hash_table->n + 1, hash_table); + } + while (hash_table_insert(hash, (long)val, hash_table) != HASH_TABLE_SUCCESS); +} + +static long hash_table_get(const char* name, hash_table_t* hash_table) +{ + hash_table_entry_t* ret = hash_table_find(hash_djb2((const unsigned char*)(name)), hash_table); + return ret->value; +} + +static tinyobj_material_t *tinyobj_material_add(tinyobj_material_t *prev, + size_t num_materials, + tinyobj_material_t *new_mat) { + tinyobj_material_t *dst; + size_t num_bytes = sizeof(tinyobj_material_t) * num_materials; + dst = (tinyobj_material_t *)TINYOBJ_REALLOC_SIZED( + prev, num_bytes, num_bytes + sizeof(tinyobj_material_t)); + + dst[num_materials] = (*new_mat); /* Just copy pointer for char* members */ + return dst; +} + +static int is_line_ending(const char *p, size_t i, size_t end_i) { + if (p[i] == '\0') return 1; + if (p[i] == '\n') return 1; /* this includes \r\n */ + if (p[i] == '\r') { + if (((i + 1) < end_i) && (p[i + 1] != '\n')) { /* detect only \r case */ + return 1; + } + } + return 0; +} + +typedef struct { + size_t pos; + size_t len; +} LineInfo; + +/* Find '\n' and create line data. */ +static int get_line_infos(const char *buf, size_t buf_len, LineInfo **line_infos, size_t *num_lines) +{ + size_t i = 0; + size_t end_idx = buf_len; + size_t prev_pos = 0; + size_t line_no = 0; + size_t last_line_ending = 0; + + /* Count # of lines. */ + for (i = 0; i < end_idx; i++) { + if (is_line_ending(buf, i, end_idx)) { + (*num_lines)++; + last_line_ending = i; + } + } + /* The last char from the input may not be a line + * ending character so add an extra line if there + * are more characters after the last line ending + * that was found. */ + if (end_idx - last_line_ending > 0) { + (*num_lines)++; + } + + if (*num_lines == 0) return TINYOBJ_ERROR_EMPTY; + + *line_infos = (LineInfo *)TINYOBJ_MALLOC(sizeof(LineInfo) * (*num_lines)); + + /* Fill line infos. */ + for (i = 0; i < end_idx; i++) { + if (is_line_ending(buf, i, end_idx)) { + (*line_infos)[line_no].pos = prev_pos; + (*line_infos)[line_no].len = i - prev_pos; + prev_pos = i + 1; + line_no++; + } + } + if (end_idx - last_line_ending > 0) { + (*line_infos)[line_no].pos = prev_pos; + (*line_infos)[line_no].len = end_idx - 1 - last_line_ending; + } + + return 0; +} + +static int tinyobj_parse_and_index_mtl_file(tinyobj_material_t **materials_out, + size_t *num_materials_out, + const char *mtl_filename, const char *obj_filename, file_reader_callback file_reader, void *ctx, + hash_table_t* material_table) { + tinyobj_material_t material; + size_t num_materials = 0; + tinyobj_material_t *materials = NULL; + int has_previous_material = 0; + const char *line_end = NULL; + size_t num_lines = 0; + LineInfo *line_infos = NULL; + size_t i = 0; + char *buf = NULL; + size_t len = 0; + + if (materials_out == NULL) { + return TINYOBJ_ERROR_INVALID_PARAMETER; + } + + if (num_materials_out == NULL) { + return TINYOBJ_ERROR_INVALID_PARAMETER; + } + + (*materials_out) = NULL; + (*num_materials_out) = 0; + + file_reader(ctx, mtl_filename, 1, obj_filename, &buf, &len); + if (len < 1) return TINYOBJ_ERROR_INVALID_PARAMETER; + if (buf == NULL) return TINYOBJ_ERROR_INVALID_PARAMETER; + + if (get_line_infos(buf, len, &line_infos, &num_lines) != 0) { + TINYOBJ_FREE(line_infos); + return TINYOBJ_ERROR_EMPTY; + } + + /* Create a default material */ + initMaterial(&material); + + for (i = 0; i < num_lines; i++) { + const char *p = &buf[line_infos[i].pos]; + size_t p_len = line_infos[i].len; + + char linebuf[4096]; + const char *token; + assert(p_len < 4095); + + memcpy(linebuf, p, p_len); + linebuf[p_len] = '\0'; + + token = linebuf; + line_end = token + p_len; + + /* Skip leading space. */ + token += strspn(token, " \t"); + + assert(token); + if (token[0] == '\0') continue; /* empty line */ + + if (token[0] == '#') continue; /* comment line */ + + /* new mtl */ + if ((0 == strncmp(token, "newmtl", 6)) && IS_SPACE((token[6]))) { + char namebuf[4096]; + + /* flush previous material. */ + if (has_previous_material) { + materials = tinyobj_material_add(materials, num_materials, &material); + num_materials++; + } else { + has_previous_material = 1; + } + + /* initial temporary material */ + initMaterial(&material); + + /* set new mtl name */ + token += 7; +#ifdef _MSC_VER + sscanf_s(token, "%s", namebuf, (unsigned)_countof(namebuf)); +#else + sscanf(token, "%s", namebuf); +#endif + material.name = my_strdup(namebuf, (size_t) (line_end - token)); + + /* Add material to material table */ + if (material_table) + hash_table_set(material.name, num_materials, material_table); + + continue; + } + + /* ambient */ + if (token[0] == 'K' && token[1] == 'a' && IS_SPACE((token[2]))) { + float r, g, b; + token += 2; + parseFloat3(&r, &g, &b, &token); + material.ambient[0] = r; + material.ambient[1] = g; + material.ambient[2] = b; + continue; + } + + /* diffuse */ + if (token[0] == 'K' && token[1] == 'd' && IS_SPACE((token[2]))) { + float r, g, b; + token += 2; + parseFloat3(&r, &g, &b, &token); + material.diffuse[0] = r; + material.diffuse[1] = g; + material.diffuse[2] = b; + continue; + } + + /* specular */ + if (token[0] == 'K' && token[1] == 's' && IS_SPACE((token[2]))) { + float r, g, b; + token += 2; + parseFloat3(&r, &g, &b, &token); + material.specular[0] = r; + material.specular[1] = g; + material.specular[2] = b; + continue; + } + + /* transmittance */ + if (token[0] == 'K' && token[1] == 't' && IS_SPACE((token[2]))) { + float r, g, b; + token += 2; + parseFloat3(&r, &g, &b, &token); + material.transmittance[0] = r; + material.transmittance[1] = g; + material.transmittance[2] = b; + continue; + } + + /* ior(index of refraction) */ + if (token[0] == 'N' && token[1] == 'i' && IS_SPACE((token[2]))) { + token += 2; + material.ior = parseFloat(&token); + continue; + } + + /* emission */ + if (token[0] == 'K' && token[1] == 'e' && IS_SPACE(token[2])) { + float r, g, b; + token += 2; + parseFloat3(&r, &g, &b, &token); + material.emission[0] = r; + material.emission[1] = g; + material.emission[2] = b; + continue; + } + + /* shininess */ + if (token[0] == 'N' && token[1] == 's' && IS_SPACE(token[2])) { + token += 2; + material.shininess = parseFloat(&token); + continue; + } + + /* illum model */ + if (0 == strncmp(token, "illum", 5) && IS_SPACE(token[5])) { + token += 6; + material.illum = parseInt(&token); + continue; + } + + /* dissolve */ + if ((token[0] == 'd' && IS_SPACE(token[1]))) { + token += 1; + material.dissolve = parseFloat(&token); + continue; + } + if (token[0] == 'T' && token[1] == 'r' && IS_SPACE(token[2])) { + token += 2; + /* Invert value of Tr(assume Tr is in range [0, 1]) */ + material.dissolve = 1.0f - parseFloat(&token); + continue; + } + + /* ambient texture */ + if ((0 == strncmp(token, "map_Ka", 6)) && IS_SPACE(token[6])) { + token += 7; + material.ambient_texname = my_strdup(token, (size_t) (line_end - token)); + continue; + } + + /* diffuse texture */ + if ((0 == strncmp(token, "map_Kd", 6)) && IS_SPACE(token[6])) { + token += 7; + material.diffuse_texname = my_strdup(token, (size_t) (line_end - token)); + continue; + } + + /* specular texture */ + if ((0 == strncmp(token, "map_Ks", 6)) && IS_SPACE(token[6])) { + token += 7; + material.specular_texname = my_strdup(token, (size_t) (line_end - token)); + continue; + } + + /* specular highlight texture */ + if ((0 == strncmp(token, "map_Ns", 6)) && IS_SPACE(token[6])) { + token += 7; + material.specular_highlight_texname = my_strdup(token, (size_t) (line_end - token)); + continue; + } + + /* bump texture */ + if ((0 == strncmp(token, "map_bump", 8)) && IS_SPACE(token[8])) { + token += 9; + material.bump_texname = my_strdup(token, (size_t) (line_end - token)); + continue; + } + + /* alpha texture */ + if ((0 == strncmp(token, "map_d", 5)) && IS_SPACE(token[5])) { + token += 6; + material.alpha_texname = my_strdup(token, (size_t) (line_end - token)); + continue; + } + + /* bump texture */ + if ((0 == strncmp(token, "bump", 4)) && IS_SPACE(token[4])) { + token += 5; + material.bump_texname = my_strdup(token, (size_t) (line_end - token)); + continue; + } + + /* displacement texture */ + if ((0 == strncmp(token, "disp", 4)) && IS_SPACE(token[4])) { + token += 5; + material.displacement_texname = my_strdup(token, (size_t) (line_end - token)); + continue; + } + + /* @todo { unknown parameter } */ + } + + TINYOBJ_FREE(line_infos); + + if (material.name) { + /* Flush last material element */ + materials = tinyobj_material_add(materials, num_materials, &material); + num_materials++; + } + + (*num_materials_out) = num_materials; + (*materials_out) = materials; + + return TINYOBJ_SUCCESS; +} + +int tinyobj_parse_mtl_file(tinyobj_material_t **materials_out, + size_t *num_materials_out, + const char *mtl_filename, const char *obj_filename, file_reader_callback file_reader, + void *ctx) { + return tinyobj_parse_and_index_mtl_file(materials_out, num_materials_out, mtl_filename, obj_filename, file_reader, ctx, NULL); +} + + +typedef enum { + COMMAND_EMPTY, + COMMAND_V, + COMMAND_VN, + COMMAND_VT, + COMMAND_F, + COMMAND_G, + COMMAND_O, + COMMAND_USEMTL, + COMMAND_MTLLIB + +} CommandType; + +typedef struct { + float vx, vy, vz; + float nx, ny, nz; + float tx, ty; + + /* @todo { Use dynamic array } */ + tinyobj_vertex_index_t f[TINYOBJ_MAX_FACES_PER_F_LINE]; + size_t num_f; + + int f_num_verts[TINYOBJ_MAX_FACES_PER_F_LINE]; + size_t num_f_num_verts; + + const char *group_name; + unsigned int group_name_len; + int pad0; + + const char *object_name; + unsigned int object_name_len; + int pad1; + + const char *material_name; + unsigned int material_name_len; + int pad2; + + const char *mtllib_name; + unsigned int mtllib_name_len; + + CommandType type; +} Command; + +static int parseLine(Command *command, const char *p, size_t p_len, + int triangulate) { + char linebuf[4096]; + const char *token; + assert(p_len < 4095); + + memcpy(linebuf, p, p_len); + linebuf[p_len] = '\0'; + + token = linebuf; + + command->type = COMMAND_EMPTY; + + /* Skip leading space. */ + skip_space(&token); + + assert(token); + if (token[0] == '\0') { /* empty line */ + return 0; + } + + if (token[0] == '#') { /* comment line */ + return 0; + } + + /* vertex */ + if (token[0] == 'v' && IS_SPACE((token[1]))) { + float x, y, z; + token += 2; + parseFloat3(&x, &y, &z, &token); + command->vx = x; + command->vy = y; + command->vz = z; + command->type = COMMAND_V; + return 1; + } + + /* normal */ + if (token[0] == 'v' && token[1] == 'n' && IS_SPACE((token[2]))) { + float x, y, z; + token += 3; + parseFloat3(&x, &y, &z, &token); + command->nx = x; + command->ny = y; + command->nz = z; + command->type = COMMAND_VN; + return 1; + } + + /* texcoord */ + if (token[0] == 'v' && token[1] == 't' && IS_SPACE((token[2]))) { + float x, y; + token += 3; + parseFloat2(&x, &y, &token); + command->tx = x; + command->ty = y; + command->type = COMMAND_VT; + return 1; + } + + /* face */ + if (token[0] == 'f' && IS_SPACE((token[1]))) { + size_t num_f = 0; + + tinyobj_vertex_index_t f[TINYOBJ_MAX_FACES_PER_F_LINE]; + token += 2; + skip_space(&token); + + while (!IS_NEW_LINE(token[0])) { + tinyobj_vertex_index_t vi = parseRawTriple(&token); + skip_space_and_cr(&token); + + f[num_f] = vi; + num_f++; + } + + command->type = COMMAND_F; + + if (triangulate) { + size_t k; + size_t n = 0; + + tinyobj_vertex_index_t i0 = f[0]; + tinyobj_vertex_index_t i1; + tinyobj_vertex_index_t i2 = f[1]; + + assert(3 * num_f < TINYOBJ_MAX_FACES_PER_F_LINE); + + for (k = 2; k < num_f; k++) { + i1 = i2; + i2 = f[k]; + command->f[3 * n + 0] = i0; + command->f[3 * n + 1] = i1; + command->f[3 * n + 2] = i2; + + command->f_num_verts[n] = 3; + n++; + } + command->num_f = 3 * n; + command->num_f_num_verts = n; + + } else { + size_t k = 0; + assert(num_f < TINYOBJ_MAX_FACES_PER_F_LINE); + for (k = 0; k < num_f; k++) { + command->f[k] = f[k]; + } + + command->num_f = num_f; + command->f_num_verts[0] = (int)num_f; + command->num_f_num_verts = 1; + } + + return 1; + } + + /* use mtl */ + if ((0 == strncmp(token, "usemtl", 6)) && IS_SPACE((token[6]))) { + token += 7; + + skip_space(&token); + command->material_name = p + (token - linebuf); + command->material_name_len = (unsigned int)length_until_newline( + token, (p_len - (size_t)(token - linebuf)) + 1); + command->type = COMMAND_USEMTL; + + return 1; + } + + /* load mtl */ + if ((0 == strncmp(token, "mtllib", 6)) && IS_SPACE((token[6]))) { + /* By specification, `mtllib` should be appear only once in .obj */ + token += 7; + + skip_space(&token); + command->mtllib_name = p + (token - linebuf); + command->mtllib_name_len = (unsigned int)length_until_newline( + token, p_len - (size_t)(token - linebuf)) + + 1; + command->type = COMMAND_MTLLIB; + + return 1; + } + + /* group name */ + if (token[0] == 'g' && IS_SPACE((token[1]))) { + /* @todo { multiple group name. } */ + token += 2; + + command->group_name = p + (token - linebuf); + command->group_name_len = (unsigned int)length_until_newline( + token, p_len - (size_t)(token - linebuf)) + + 1; + command->type = COMMAND_G; + + return 1; + } + + /* object name */ + if (token[0] == 'o' && IS_SPACE((token[1]))) { + /* @todo { multiple object name? } */ + token += 2; + + command->object_name = p + (token - linebuf); + command->object_name_len = (unsigned int)length_until_newline( + token, p_len - (size_t)(token - linebuf)) + + 1; + command->type = COMMAND_O; + + return 1; + } + + return 0; +} + +static size_t basename_len(const char *filename, size_t filename_length) { + /* Count includes NUL terminator. */ + const char *p = &filename[filename_length - 1]; + size_t count = 1; + + /* On Windows, the directory delimiter is '\' and both it and '/' is + * reserved by the filesystem. On *nix platforms, only the '/' character + * is reserved, so account for the two cases separately. */ + #if _WIN32 + while (p[-1] != '/' && p[-1] != '\\') { + if (p == filename) { + count = filename_length; + return count; + } + count++; + p--; + } + p++; + return count; + #else + while (*(--p) != '/') { + if (p == filename) { + count = filename_length; + return count; + } + count++; + } + return count; + #endif +} + +static char *generate_mtl_filename(const char *obj_filename, + size_t obj_filename_length, + const char *mtllib_name, + size_t mtllib_name_length) { + /* Create a dynamically-allocated material filename. This allows the material + * and obj files to be separated, however the mtllib name in the OBJ file + * must be a relative path to the material file from the OBJ's directory. + * This does not support the matllib name as an absolute address. */ + char *mtl_filename; + char *p; + size_t mtl_filename_length; + size_t obj_basename_length; + + /* Calculate required size of mtl_filename and allocate */ + obj_basename_length = basename_len(obj_filename, obj_filename_length); + mtl_filename_length = (obj_filename_length - obj_basename_length) + mtllib_name_length; + mtl_filename = (char *)TINYOBJ_MALLOC(mtl_filename_length); + + /* Copy over the obj's path */ + memcpy(mtl_filename, obj_filename, (obj_filename_length - obj_basename_length)); + + /* Overwrite the obj basename with the mtllib name, filling the string */ + p = &mtl_filename[mtl_filename_length - mtllib_name_length]; + strcpy(p, mtllib_name); + return mtl_filename; +} + +int tinyobj_parse_obj(tinyobj_attrib_t *attrib, tinyobj_shape_t **shapes, + size_t *num_shapes, tinyobj_material_t **materials_out, + size_t *num_materials_out, const char *obj_filename, + file_reader_callback file_reader, void *ctx, + unsigned int flags) { + LineInfo *line_infos = NULL; + Command *commands = NULL; + size_t num_lines = 0; + + size_t num_v = 0; + size_t num_vn = 0; + size_t num_vt = 0; + size_t num_f = 0; + size_t num_faces = 0; + + int mtllib_line_index = -1; + + tinyobj_material_t *materials = NULL; + size_t num_materials = 0; + + hash_table_t material_table; + + char *buf = NULL; + size_t len = 0; + file_reader(ctx, obj_filename, /* is_mtl */0, obj_filename, &buf, &len); + + if (len < 1) return TINYOBJ_ERROR_INVALID_PARAMETER; + if (attrib == NULL) return TINYOBJ_ERROR_INVALID_PARAMETER; + if (shapes == NULL) return TINYOBJ_ERROR_INVALID_PARAMETER; + if (num_shapes == NULL) return TINYOBJ_ERROR_INVALID_PARAMETER; + if (buf == NULL) return TINYOBJ_ERROR_INVALID_PARAMETER; + if (materials_out == NULL) return TINYOBJ_ERROR_INVALID_PARAMETER; + if (num_materials_out == NULL) return TINYOBJ_ERROR_INVALID_PARAMETER; + + tinyobj_attrib_init(attrib); + + /* 1. create line data */ + if (get_line_infos(buf, len, &line_infos, &num_lines) != 0) { + return TINYOBJ_ERROR_EMPTY; + } + + commands = (Command *)TINYOBJ_MALLOC(sizeof(Command) * num_lines); + + create_hash_table(HASH_TABLE_DEFAULT_SIZE, &material_table); + + /* 2. parse each line */ + { + size_t i = 0; + for (i = 0; i < num_lines; i++) { + int ret = parseLine(&commands[i], &buf[line_infos[i].pos], + line_infos[i].len, flags & TINYOBJ_FLAG_TRIANGULATE); + if (ret) { + if (commands[i].type == COMMAND_V) { + num_v++; + } else if (commands[i].type == COMMAND_VN) { + num_vn++; + } else if (commands[i].type == COMMAND_VT) { + num_vt++; + } else if (commands[i].type == COMMAND_F) { + num_f += commands[i].num_f; + num_faces += commands[i].num_f_num_verts; + } + + if (commands[i].type == COMMAND_MTLLIB) { + mtllib_line_index = (int)i; + } + } + } + } + + /* line_infos are not used anymore. Release memory. */ + if (line_infos) { + TINYOBJ_FREE(line_infos); + } + + /* Load material (if it exists) */ + if (mtllib_line_index >= 0 && commands[mtllib_line_index].mtllib_name && + commands[mtllib_line_index].mtllib_name_len > 0) { + /* Maximum length allowed by Linux - higher than Windows and macOS */ + size_t obj_filename_len = my_strnlen(obj_filename, 4096 + 255) + 1; + char *mtl_filename; + char *mtllib_name; + size_t mtllib_name_len = 0; + int ret; + + mtllib_name_len = length_until_line_feed(commands[mtllib_line_index].mtllib_name, + commands[mtllib_line_index].mtllib_name_len); + + mtllib_name = my_strndup(commands[mtllib_line_index].mtllib_name, + mtllib_name_len); + + /* allow for NUL terminator */ + mtllib_name_len++; + mtl_filename = generate_mtl_filename(obj_filename, obj_filename_len, + mtllib_name, mtllib_name_len); + + ret = tinyobj_parse_and_index_mtl_file(&materials, &num_materials, + mtl_filename, obj_filename, + file_reader, ctx, + &material_table); + + if (ret != TINYOBJ_SUCCESS) { + /* warning. */ + fprintf(stderr, "TINYOBJ: Failed to parse material file '%s': %d\n", mtl_filename, ret); + } + TINYOBJ_FREE(mtl_filename); + TINYOBJ_FREE(mtllib_name); + } + + /* Construct attributes */ + + { + size_t v_count = 0; + size_t n_count = 0; + size_t t_count = 0; + size_t f_count = 0; + size_t face_count = 0; + int material_id = -1; /* -1 = default unknown material. */ + size_t i = 0; + + attrib->vertices = (float *)TINYOBJ_MALLOC(sizeof(float) * num_v * 3); + attrib->num_vertices = (unsigned int)num_v; + attrib->normals = (float *)TINYOBJ_MALLOC(sizeof(float) * num_vn * 3); + attrib->num_normals = (unsigned int)num_vn; + attrib->texcoords = (float *)TINYOBJ_MALLOC(sizeof(float) * num_vt * 2); + attrib->num_texcoords = (unsigned int)num_vt; + attrib->faces = (tinyobj_vertex_index_t *)TINYOBJ_MALLOC( + sizeof(tinyobj_vertex_index_t) * num_f); + attrib->num_faces = (unsigned int)num_f; + attrib->face_num_verts = (int *)TINYOBJ_MALLOC(sizeof(int) * num_faces); + attrib->material_ids = (int *)TINYOBJ_MALLOC(sizeof(int) * num_faces); + attrib->num_face_num_verts = (unsigned int)num_faces; + + for (i = 0; i < num_lines; i++) { + if (commands[i].type == COMMAND_EMPTY) { + continue; + } else if (commands[i].type == COMMAND_USEMTL) { + /* @todo + if (commands[t][i].material_name && + commands[t][i].material_name_len > 0) { + std::string material_name(commands[t][i].material_name, + commands[t][i].material_name_len); + + if (material_map.find(material_name) != material_map.end()) { + material_id = material_map[material_name]; + } else { + // Assign invalid material ID + material_id = -1; + } + } + */ + if (commands[i].material_name && + commands[i].material_name_len >0) + { + /* Create a null terminated string */ + char* material_name_null_term = (char*) TINYOBJ_MALLOC(commands[i].material_name_len + 1); + memcpy((void*) material_name_null_term, (const void*) commands[i].material_name, commands[i].material_name_len); + material_name_null_term[commands[i].material_name_len] = 0; + + if (hash_table_exists(material_name_null_term, &material_table)) + material_id = (int)hash_table_get(material_name_null_term, &material_table); + else + material_id = -1; + + TINYOBJ_FREE(material_name_null_term); + } + } else if (commands[i].type == COMMAND_V) { + attrib->vertices[3 * v_count + 0] = commands[i].vx; + attrib->vertices[3 * v_count + 1] = commands[i].vy; + attrib->vertices[3 * v_count + 2] = commands[i].vz; + v_count++; + } else if (commands[i].type == COMMAND_VN) { + attrib->normals[3 * n_count + 0] = commands[i].nx; + attrib->normals[3 * n_count + 1] = commands[i].ny; + attrib->normals[3 * n_count + 2] = commands[i].nz; + n_count++; + } else if (commands[i].type == COMMAND_VT) { + attrib->texcoords[2 * t_count + 0] = commands[i].tx; + attrib->texcoords[2 * t_count + 1] = commands[i].ty; + t_count++; + } else if (commands[i].type == COMMAND_F) { + size_t k = 0; + for (k = 0; k < commands[i].num_f; k++) { + tinyobj_vertex_index_t vi = commands[i].f[k]; + int v_idx = fixIndex(vi.v_idx, v_count); + int vn_idx = fixIndex(vi.vn_idx, n_count); + int vt_idx = fixIndex(vi.vt_idx, t_count); + attrib->faces[f_count + k].v_idx = v_idx; + attrib->faces[f_count + k].vn_idx = vn_idx; + attrib->faces[f_count + k].vt_idx = vt_idx; + } + + for (k = 0; k < commands[i].num_f_num_verts; k++) { + attrib->material_ids[face_count + k] = material_id; + attrib->face_num_verts[face_count + k] = commands[i].f_num_verts[k]; + } + + f_count += commands[i].num_f; + face_count += commands[i].num_f_num_verts; + } + } + } + + /* 5. Construct shape information. */ + { + unsigned int face_count = 0; + size_t i = 0; + size_t n = 0; + size_t shape_idx = 0; + + const char *shape_name = NULL; + unsigned int shape_name_len = 0; + const char *prev_shape_name = NULL; + unsigned int prev_shape_name_len = 0; + unsigned int prev_shape_face_offset = 0; + unsigned int prev_face_offset = 0; + tinyobj_shape_t prev_shape = {NULL, 0, 0}; + + /* Find the number of shapes in .obj */ + for (i = 0; i < num_lines; i++) { + if (commands[i].type == COMMAND_O || commands[i].type == COMMAND_G) { + n++; + } + } + + /* Allocate array of shapes with maximum possible size(+1 for unnamed + * group/object). + * Actual # of shapes found in .obj is determined in the later */ + (*shapes) = (tinyobj_shape_t*)TINYOBJ_MALLOC(sizeof(tinyobj_shape_t) * (n + 1)); + + for (i = 0; i < num_lines; i++) { + if (commands[i].type == COMMAND_O || commands[i].type == COMMAND_G) { + if (commands[i].type == COMMAND_O) { + shape_name = commands[i].object_name; + shape_name_len = commands[i].object_name_len; + } else { + shape_name = commands[i].group_name; + shape_name_len = commands[i].group_name_len; + } + + if (face_count == 0) { + /* 'o' or 'g' appears before any 'f' */ + prev_shape_name = shape_name; + prev_shape_name_len = shape_name_len; + prev_shape_face_offset = face_count; + prev_face_offset = face_count; + } else { + if (shape_idx == 0) { + /* 'o' or 'g' after some 'v' lines. */ + (*shapes)[shape_idx].name = my_strndup( + prev_shape_name, prev_shape_name_len); /* may be NULL */ + (*shapes)[shape_idx].face_offset = prev_shape.face_offset; + (*shapes)[shape_idx].length = face_count - prev_face_offset; + shape_idx++; + + prev_face_offset = face_count; + + } else { + if ((face_count - prev_face_offset) > 0) { + (*shapes)[shape_idx].name = + my_strndup(prev_shape_name, prev_shape_name_len); + (*shapes)[shape_idx].face_offset = prev_face_offset; + (*shapes)[shape_idx].length = face_count - prev_face_offset; + shape_idx++; + prev_face_offset = face_count; + } + } + + /* Record shape info for succeeding 'o' or 'g' command. */ + prev_shape_name = shape_name; + prev_shape_name_len = shape_name_len; + prev_shape_face_offset = face_count; + } + } + if (commands[i].type == COMMAND_F) { + face_count++; + } + } + + if ((face_count - prev_face_offset) > 0) { + size_t length = face_count - prev_shape_face_offset; + if (length > 0) { + (*shapes)[shape_idx].name = + my_strndup(prev_shape_name, prev_shape_name_len); + (*shapes)[shape_idx].face_offset = prev_face_offset; + (*shapes)[shape_idx].length = face_count - prev_face_offset; + shape_idx++; + } + } else { + /* Guess no 'v' line occurrence after 'o' or 'g', so discards current + * shape information. */ + } + + (*num_shapes) = shape_idx; + } + + if (commands) { + TINYOBJ_FREE(commands); + } + + destroy_hash_table(&material_table); + + (*materials_out) = materials; + (*num_materials_out) = num_materials; + + return TINYOBJ_SUCCESS; +} + +void tinyobj_attrib_init(tinyobj_attrib_t *attrib) { + attrib->vertices = NULL; + attrib->num_vertices = 0; + attrib->normals = NULL; + attrib->num_normals = 0; + attrib->texcoords = NULL; + attrib->num_texcoords = 0; + attrib->faces = NULL; + attrib->num_faces = 0; + attrib->face_num_verts = NULL; + attrib->num_face_num_verts = 0; + attrib->material_ids = NULL; +} + +void tinyobj_attrib_free(tinyobj_attrib_t *attrib) { + if (attrib->vertices) TINYOBJ_FREE(attrib->vertices); + if (attrib->normals) TINYOBJ_FREE(attrib->normals); + if (attrib->texcoords) TINYOBJ_FREE(attrib->texcoords); + if (attrib->faces) TINYOBJ_FREE(attrib->faces); + if (attrib->face_num_verts) TINYOBJ_FREE(attrib->face_num_verts); + if (attrib->material_ids) TINYOBJ_FREE(attrib->material_ids); +} + +void tinyobj_shapes_free(tinyobj_shape_t *shapes, size_t num_shapes) { + size_t i; + if (shapes == NULL) return; + + for (i = 0; i < num_shapes; i++) { + if (shapes[i].name) TINYOBJ_FREE(shapes[i].name); + } + + TINYOBJ_FREE(shapes); +} + +void tinyobj_materials_free(tinyobj_material_t *materials, + size_t num_materials) { + size_t i; + if (materials == NULL) return; + + for (i = 0; i < num_materials; i++) { + if (materials[i].name) TINYOBJ_FREE(materials[i].name); + if (materials[i].ambient_texname) TINYOBJ_FREE(materials[i].ambient_texname); + if (materials[i].diffuse_texname) TINYOBJ_FREE(materials[i].diffuse_texname); + if (materials[i].specular_texname) TINYOBJ_FREE(materials[i].specular_texname); + if (materials[i].specular_highlight_texname) + TINYOBJ_FREE(materials[i].specular_highlight_texname); + if (materials[i].bump_texname) TINYOBJ_FREE(materials[i].bump_texname); + if (materials[i].displacement_texname) + TINYOBJ_FREE(materials[i].displacement_texname); + if (materials[i].alpha_texname) TINYOBJ_FREE(materials[i].alpha_texname); + } + + TINYOBJ_FREE(materials); +} +#endif /* TINYOBJ_LOADER_C_IMPLEMENTATION */ + +#endif /* TINOBJ_LOADER_C_H_ */ diff --git a/lib/zmath/README.md b/lib/zmath/README.md new file mode 100644 index 0000000..c11ef1f --- /dev/null +++ b/lib/zmath/README.md @@ -0,0 +1,138 @@ +# zmath v0.9.6 - SIMD math library for game developers + +Tested on x86_64 and AArch64. + +Provides ~140 optimized routines and ~70 extensive tests. + +Can be used with any graphics API. + +Documentation can be found [here](https://github.com/michal-z/zig-gamedev/blob/main/libs/zmath/src/zmath.zig). + +Benchamrks can be found [here](https://github.com/michal-z/zig-gamedev/blob/main/libs/zmath/src/benchmark.zig). + +An intro article can be found [here](https://zig.news/michalz/fast-multi-platform-simd-math-library-in-zig-2adn). + +## Getting started + +Copy `zmath` folder to a `libs` subdirectory of the root of your project. + +Then in your `build.zig` add: + +```zig +const std = @import("std"); +const zmath = @import("libs/zmath/build.zig"); + +pub fn build(b: *std.Build) void { + ... + const optimize = b.standardOptimizeOption(.{}); + const target = b.standardTargetOptions(.{}); + + zmath_pkg = zmath.package(b, target, optimize, .{ + .options = .{ .enable_cross_platform_determinism = true }, + }); + + zmath_pkg.link(exe); +} +``` + +Now in your code you may import and use zmath: + +```zig +const zm = @import("zmath"); + +pub fn main() !void { + // + // OpenGL/Vulkan example + // + const object_to_world = zm.rotationY(..); + const world_to_view = zm.lookAtRh( + zm.f32x4(3.0, 3.0, 3.0, 1.0), // eye position + zm.f32x4(0.0, 0.0, 0.0, 1.0), // focus point + zm.f32x4(0.0, 1.0, 0.0, 0.0), // up direction ('w' coord is zero because this is a vector not a point) + ); + // `perspectiveFovRhGl` produces Z values in [-1.0, 1.0] range (Vulkan app should use `perspectiveFovRh`) + const view_to_clip = zm.perspectiveFovRhGl(0.25 * math.pi, aspect_ratio, 0.1, 20.0); + + const object_to_view = zm.mul(object_to_world, world_to_view); + const object_to_clip = zm.mul(object_to_view, view_to_clip); + + // Transposition is needed because GLSL uses column-major matrices by default + gl.uniformMatrix4fv(0, 1, gl.TRUE, zm.arrNPtr(&object_to_clip)); + + // In GLSL: gl_Position = vec4(in_position, 1.0) * object_to_clip; + + // + // DirectX example + // + const object_to_world = zm.rotationY(..); + const world_to_view = zm.lookAtLh( + zm.f32x4(3.0, 3.0, -3.0, 1.0), // eye position + zm.f32x4(0.0, 0.0, 0.0, 1.0), // focus point + zm.f32x4(0.0, 1.0, 0.0, 0.0), // up direction ('w' coord is zero because this is a vector not a point) + ); + const view_to_clip = zm.perspectiveFovLh(0.25 * math.pi, aspect_ratio, 0.1, 20.0); + + const object_to_view = zm.mul(object_to_world, world_to_view); + const object_to_clip = zm.mul(object_to_view, view_to_clip); + + // Transposition is needed because HLSL uses column-major matrices by default + const mem = allocateUploadMemory(...); + zm.storeMat(mem, zm.transpose(object_to_clip)); + + // In HLSL: out_position_sv = mul(float4(in_position, 1.0), object_to_clip); + + // + // 'WASD' camera movement example + // + { + const speed = zm.f32x4s(10.0); + const delta_time = zm.f32x4s(demo.frame_stats.delta_time); + const transform = zm.mul(zm.rotationX(demo.camera.pitch), zm.rotationY(demo.camera.yaw)); + var forward = zm.normalize3(zm.mul(zm.f32x4(0.0, 0.0, 1.0, 0.0), transform)); + + zm.storeArr3(&demo.camera.forward, forward); + + const right = speed * delta_time * zm.normalize3(zm.cross3(zm.f32x4(0.0, 1.0, 0.0, 0.0), forward)); + forward = speed * delta_time * forward; + + var cam_pos = zm.loadArr3(demo.camera.position); + + if (keyDown('W')) { + cam_pos += forward; + } else if (keyDown('S')) { + cam_pos -= forward; + } + if (keyDown('D')) { + cam_pos += right; + } else if (keyDown('A')) { + cam_pos -= right; + } + + zm.storeArr3(&demo.camera.position, cam_pos); + } + + // + // SIMD wave equation solver example (works with vector width 4, 8 and 16) + // 'T' can be F32x4, F32x8 or F32x16 + // + var z_index: i32 = 0; + while (z_index < grid_size) : (z_index += 1) { + const z = scale * @intToFloat(f32, z_index - grid_size / 2); + const vz = zm.splat(T, z); + + var x_index: i32 = 0; + while (x_index < grid_size) : (x_index += zm.veclen(T)) { + const x = scale * @intToFloat(f32, x_index - grid_size / 2); + const vx = zm.splat(T, x) + voffset * zm.splat(T, scale); + + const d = zm.sqrt(vx * vx + vz * vz); + const vy = zm.sin(d - vtime); + + const index = @intCast(usize, x_index + z_index * grid_size); + zm.store(xslice[index..], vx, 0); + zm.store(yslice[index..], vy, 0); + zm.store(zslice[index..], vz, 0); + } + } +} +``` diff --git a/lib/zmath/build.zig b/lib/zmath/build.zig new file mode 100644 index 0000000..ce21bc1 --- /dev/null +++ b/lib/zmath/build.zig @@ -0,0 +1,97 @@ +const std = @import("std"); + +pub const Options = struct { + enable_cross_platform_determinism: bool = true, +}; + +pub const Package = struct { + options: Options, + zmath: *std.Build.Module, + zmath_options: *std.Build.Module, + + pub fn link(pkg: Package, exe: *std.Build.CompileStep) void { + exe.addModule("zmath", pkg.zmath); + exe.addModule("zmath_options", pkg.zmath_options); + } +}; + +pub fn package( + b: *std.Build, + _: std.zig.CrossTarget, + _: std.builtin.Mode, + args: struct { + options: Options = .{}, + }, +) Package { + const step = b.addOptions(); + step.addOption( + bool, + "enable_cross_platform_determinism", + args.options.enable_cross_platform_determinism, + ); + + const zmath_options = step.createModule(); + + const zmath = b.createModule(.{ + .source_file = .{ .path = thisDir() ++ "/src/main.zig" }, + .dependencies = &.{ + .{ .name = "zmath_options", .module = zmath_options }, + }, + }); + + return .{ + .options = args.options, + .zmath = zmath, + .zmath_options = zmath_options, + }; +} + +pub fn build(b: *std.Build) void { + const optimize = b.standardOptimizeOption(.{}); + const target = b.standardTargetOptions(.{}); + + const test_step = b.step("test", "Run zmath tests"); + test_step.dependOn(runTests(b, optimize, target)); + + const benchmark_step = b.step("benchmark", "Run zmath benchmarks"); + benchmark_step.dependOn(runBenchmarks(b, target)); +} + +pub fn runTests( + b: *std.Build, + optimize: std.builtin.Mode, + target: std.zig.CrossTarget, +) *std.Build.Step { + const tests = b.addTest(.{ + .name = "zmath-tests", + .root_source_file = .{ .path = thisDir() ++ "/src/main.zig" }, + .target = target, + .optimize = optimize, + }); + + const zmath_pkg = package(b, target, optimize, .{}); + tests.addModule("zmath_options", zmath_pkg.zmath_options); + + return &tests.run().step; +} + +pub fn runBenchmarks( + b: *std.Build, + target: std.zig.CrossTarget, +) *std.Build.Step { + const exe = b.addExecutable(.{ + .name = "zmath-benchmarks", + .root_source_file = .{ .path = thisDir() ++ "/src/benchmark.zig" }, + .target = target, + .optimize = .ReleaseFast, + }); + + const zmath_pkg = package(b, target, .ReleaseFast, .{}); + exe.addModule("zmath", zmath_pkg.zmath); + + return &exe.run().step; +} + +inline fn thisDir() []const u8 { + return comptime std.fs.path.dirname(@src().file) orelse "."; +} diff --git a/lib/zmath/src/benchmark.zig b/lib/zmath/src/benchmark.zig new file mode 100644 index 0000000..136e29d --- /dev/null +++ b/lib/zmath/src/benchmark.zig @@ -0,0 +1,469 @@ +// ------------------------------------------------------------------------------------------------- +// zmath - benchmarks +// ------------------------------------------------------------------------------------------------- +// 'zig build benchmark' in the root project directory will build and run 'ReleaseFast' configuration. +// +// ------------------------------------------------------------------------------------------------- +// 'AMD Ryzen 9 3950X 16-Core Processor', Windows 11, Zig 0.10.0-dev.2620+0e9458a3f +// ------------------------------------------------------------------------------------------------- +// matrix mul benchmark (AOS) - scalar version: 1.5880s, zmath version: 1.0642s +// cross3, scale, bias benchmark (AOS) - scalar version: 0.9318s, zmath version: 0.6888s +// cross3, dot3, scale, bias benchmark (AOS) - scalar version: 1.2258s, zmath version: 1.1095s +// quaternion mul benchmark (AOS) - scalar version: 1.4123s, zmath version: 0.6958s +// wave benchmark (SOA) - scalar version: 4.8165s, zmath version: 0.7338s +// +// ------------------------------------------------------------------------------------------------- +// 'AMD Ryzen 7 5800X 8-Core Processer', Linux 5.17.14, Zig 0.10.0-dev.2624+d506275a0 +// ------------------------------------------------------------------------------------------------- +// matrix mul benchmark (AOS) - scalar version: 1.3672s, zmath version: 0.8617s +// cross3, scale, bias benchmark (AOS) - scalar version: 0.6586s, zmath version: 0.4803s +// cross3, dot3, scale, bias benchmark (AOS) - scalar version: 1.0620s, zmath version: 0.8942s +// quaternion mul benchmark (AOS) - scalar version: 1.1324s, zmath version: 0.6064s +// wave benchmark (SOA) - scalar version: 3.6598s, zmath version: 0.4231s +// +// ------------------------------------------------------------------------------------------------- +// 'Apple M1 Max', macOS Version 12.4, Zig 0.10.0-dev.2657+74442f350 +// ------------------------------------------------------------------------------------------------- +// matrix mul benchmark (AOS) - scalar version: 1.0297s, zmath version: 1.0538s +// cross3, scale, bias benchmark (AOS) - scalar version: 0.6294s, zmath version: 0.6532s +// cross3, dot3, scale, bias benchmark (AOS) - scalar version: 0.9807s, zmath version: 1.0988s +// quaternion mul benchmark (AOS) - scalar version: 1.5413s, zmath version: 0.7800s +// wave benchmark (SOA) - scalar version: 3.4220s, zmath version: 1.0255s +// +// ------------------------------------------------------------------------------------------------- +// '11th Gen Intel(R) Core(TM) i7-11800H @ 2.30GHz', Windows 11, Zig 0.10.0-dev.2620+0e9458a3f +// ------------------------------------------------------------------------------------------------- +// matrix mul benchmark (AOS) - scalar version: 2.2308s, zmath version: 0.9376s +// cross3, scale, bias benchmark (AOS) - scalar version: 1.0821s, zmath version: 0.5110s +// cross3, dot3, scale, bias benchmark (AOS) - scalar version: 1.6580s, zmath version: 0.9167s +// quaternion mul benchmark (AOS) - scalar version: 2.0139s, zmath version: 0.5856s +// wave benchmark (SOA) - scalar version: 3.7832s, zmath version: 0.3642s +// +// ------------------------------------------------------------------------------------------------- + +pub fn main() !void { + var gpa = std.heap.GeneralPurposeAllocator(.{}){}; + defer _ = gpa.deinit(); + const allocator = gpa.allocator(); + + // m = mul(ma, mb); data set fits in L1 cache; AOS data layout. + try mat4MulBenchmark(allocator, 100_000); + + // v = 0.01 * cross3(va, vb) + vec3(1.0); data set fits in L1 cache; AOS data layout. + try cross3ScaleBiasBenchmark(allocator, 10_000); + + // v = dot3(va, vb) * (0.1 * cross3(va, vb) + vec3(1.0)); data set fits in L1 cache; AOS data layout. + try cross3Dot3ScaleBiasBenchmark(allocator, 10_000); + + // q = qmul(qa, qb); data set fits in L1 cache; AOS data layout. + try quatBenchmark(allocator, 10_000); + + // d = sqrt(x * x + z * z); y = sin(d - t); SOA layout. + try waveBenchmark(allocator, 1_000); +} + +const std = @import("std"); +const time = std.time; +const Timer = time.Timer; +const zm = @import("zmath"); + +var prng = std.rand.DefaultPrng.init(0); +const random = prng.random(); + +noinline fn mat4MulBenchmark(allocator: std.mem.Allocator, comptime count: comptime_int) !void { + std.debug.print("\n", .{}); + std.debug.print("{s:>42} - ", .{"matrix mul benchmark (AOS)"}); + + var data0 = std.ArrayList([16]f32).init(allocator); + defer data0.deinit(); + var data1 = std.ArrayList([16]f32).init(allocator); + defer data1.deinit(); + + var i: usize = 0; + while (i < 64) : (i += 1) { + try data0.append([16]f32{ + random.float(f32), random.float(f32), random.float(f32), random.float(f32), + random.float(f32), random.float(f32), random.float(f32), random.float(f32), + random.float(f32), random.float(f32), random.float(f32), random.float(f32), + random.float(f32), random.float(f32), random.float(f32), random.float(f32), + }); + try data1.append([16]f32{ + random.float(f32), random.float(f32), random.float(f32), random.float(f32), + random.float(f32), random.float(f32), random.float(f32), random.float(f32), + random.float(f32), random.float(f32), random.float(f32), random.float(f32), + random.float(f32), random.float(f32), random.float(f32), random.float(f32), + }); + } + + // Warmup, fills L1 cache. + i = 0; + while (i < 100) : (i += 1) { + for (data1.items) |b| { + for (data0.items) |a| { + const ma = zm.loadMat(a[0..]); + const mb = zm.loadMat(b[0..]); + const r = zm.mul(ma, mb); + std.mem.doNotOptimizeAway(&r); + } + } + } + + { + i = 0; + var timer = try Timer.start(); + const start = timer.lap(); + while (i < count) : (i += 1) { + for (data1.items) |b| { + for (data0.items) |a| { + const r = [16]f32{ + a[0] * b[0] + a[1] * b[4] + a[2] * b[8] + a[3] * b[12], + a[0] * b[1] + a[1] * b[5] + a[2] * b[9] + a[3] * b[13], + a[0] * b[2] + a[1] * b[6] + a[2] * b[10] + a[3] * b[14], + a[0] * b[3] + a[1] * b[7] + a[2] * b[11] + a[3] * b[15], + a[4] * b[0] + a[5] * b[4] + a[6] * b[8] + a[7] * b[12], + a[4] * b[1] + a[5] * b[5] + a[6] * b[9] + a[7] * b[13], + a[4] * b[2] + a[5] * b[6] + a[6] * b[10] + a[7] * b[14], + a[4] * b[3] + a[5] * b[7] + a[6] * b[11] + a[7] * b[15], + a[8] * b[0] + a[9] * b[4] + a[10] * b[8] + a[11] * b[12], + a[8] * b[1] + a[9] * b[5] + a[10] * b[9] + a[11] * b[13], + a[8] * b[2] + a[9] * b[6] + a[10] * b[10] + a[11] * b[14], + a[8] * b[3] + a[9] * b[7] + a[10] * b[11] + a[11] * b[15], + a[12] * b[0] + a[13] * b[4] + a[14] * b[8] + a[15] * b[12], + a[12] * b[1] + a[13] * b[5] + a[14] * b[9] + a[15] * b[13], + a[12] * b[2] + a[13] * b[6] + a[14] * b[10] + a[15] * b[14], + a[12] * b[3] + a[13] * b[7] + a[14] * b[11] + a[15] * b[15], + }; + std.mem.doNotOptimizeAway(&r); + } + } + } + const end = timer.read(); + const elapsed_s = @intToFloat(f64, end - start) / time.ns_per_s; + + std.debug.print("scalar version: {d:.4}s, ", .{elapsed_s}); + } + + { + i = 0; + var timer = try Timer.start(); + const start = timer.lap(); + while (i < count) : (i += 1) { + for (data1.items) |b| { + for (data0.items) |a| { + const ma = zm.loadMat(a[0..]); + const mb = zm.loadMat(b[0..]); + const r = zm.mul(ma, mb); + std.mem.doNotOptimizeAway(&r); + } + } + } + const end = timer.read(); + const elapsed_s = @intToFloat(f64, end - start) / time.ns_per_s; + + std.debug.print("zmath version: {d:.4}s\n", .{elapsed_s}); + } +} + +noinline fn cross3ScaleBiasBenchmark(allocator: std.mem.Allocator, comptime count: comptime_int) !void { + std.debug.print("{s:>42} - ", .{"cross3, scale, bias benchmark (AOS)"}); + + var data0 = std.ArrayList([3]f32).init(allocator); + defer data0.deinit(); + var data1 = std.ArrayList([3]f32).init(allocator); + defer data1.deinit(); + + var i: usize = 0; + while (i < 256) : (i += 1) { + try data0.append([3]f32{ random.float(f32), random.float(f32), random.float(f32) }); + try data1.append([3]f32{ random.float(f32), random.float(f32), random.float(f32) }); + } + + // Warmup, fills L1 cache. + i = 0; + while (i < 100) : (i += 1) { + for (data1.items) |b| { + for (data0.items) |a| { + const va = zm.loadArr3(a); + const vb = zm.loadArr3(b); + const cp = zm.f32x4s(0.01) * zm.cross3(va, vb) + zm.f32x4s(1.0); + std.mem.doNotOptimizeAway(&cp); + } + } + } + + { + i = 0; + var timer = try Timer.start(); + const start = timer.lap(); + while (i < count) : (i += 1) { + for (data1.items) |b| { + for (data0.items) |a| { + const r = [3]f32{ + 0.01 * (a[1] * b[2] - a[2] * b[1]) + 1.0, + 0.01 * (a[2] * b[0] - a[0] * b[2]) + 1.0, + 0.01 * (a[0] * b[1] - a[1] * b[0]) + 1.0, + }; + std.mem.doNotOptimizeAway(&r); + } + } + } + const end = timer.read(); + const elapsed_s = @intToFloat(f64, end - start) / time.ns_per_s; + + std.debug.print("scalar version: {d:.4}s, ", .{elapsed_s}); + } + + { + i = 0; + var timer = try Timer.start(); + const start = timer.lap(); + while (i < count) : (i += 1) { + for (data1.items) |b| { + for (data0.items) |a| { + const va = zm.loadArr3(a); + const vb = zm.loadArr3(b); + const cp = zm.f32x4s(0.01) * zm.cross3(va, vb) + zm.f32x4s(1.0); + std.mem.doNotOptimizeAway(&cp); + } + } + } + const end = timer.read(); + const elapsed_s = @intToFloat(f64, end - start) / time.ns_per_s; + + std.debug.print("zmath version: {d:.4}s\n", .{elapsed_s}); + } +} + +noinline fn cross3Dot3ScaleBiasBenchmark(allocator: std.mem.Allocator, comptime count: comptime_int) !void { + std.debug.print("{s:>42} - ", .{"cross3, dot3, scale, bias benchmark (AOS)"}); + + var data0 = std.ArrayList([3]f32).init(allocator); + defer data0.deinit(); + var data1 = std.ArrayList([3]f32).init(allocator); + defer data1.deinit(); + + var i: usize = 0; + while (i < 256) : (i += 1) { + try data0.append([3]f32{ random.float(f32), random.float(f32), random.float(f32) }); + try data1.append([3]f32{ random.float(f32), random.float(f32), random.float(f32) }); + } + + // Warmup, fills L1 cache. + i = 0; + while (i < 100) : (i += 1) { + for (data1.items) |b| { + for (data0.items) |a| { + const va = zm.loadArr3(a); + const vb = zm.loadArr3(b); + const r = (zm.dot3(va, vb) * (zm.f32x4s(0.1) * zm.cross3(va, vb) + zm.f32x4s(1.0)))[0]; + std.mem.doNotOptimizeAway(&r); + } + } + } + + { + i = 0; + var timer = try Timer.start(); + const start = timer.lap(); + while (i < count) : (i += 1) { + for (data1.items) |b| { + for (data0.items) |a| { + const d = a[0] * b[0] + a[1] * b[1] + a[2] * b[2]; + const r = [3]f32{ + d * (0.1 * (a[1] * b[2] - a[2] * b[1]) + 1.0), + d * (0.1 * (a[2] * b[0] - a[0] * b[2]) + 1.0), + d * (0.1 * (a[0] * b[1] - a[1] * b[0]) + 1.0), + }; + std.mem.doNotOptimizeAway(&r); + } + } + } + const end = timer.read(); + const elapsed_s = @intToFloat(f64, end - start) / time.ns_per_s; + + std.debug.print("scalar version: {d:.4}s, ", .{elapsed_s}); + } + + { + i = 0; + var timer = try Timer.start(); + const start = timer.lap(); + while (i < count) : (i += 1) { + for (data1.items) |b| { + for (data0.items) |a| { + const va = zm.loadArr3(a); + const vb = zm.loadArr3(b); + const r = zm.dot3(va, vb) * (zm.f32x4s(0.1) * zm.cross3(va, vb) + zm.f32x4s(1.0)); + std.mem.doNotOptimizeAway(&r); + } + } + } + const end = timer.read(); + const elapsed_s = @intToFloat(f64, end - start) / time.ns_per_s; + + std.debug.print("zmath version: {d:.4}s\n", .{elapsed_s}); + } +} + +noinline fn quatBenchmark(allocator: std.mem.Allocator, comptime count: comptime_int) !void { + std.debug.print("{s:>42} - ", .{"quaternion mul benchmark (AOS)"}); + + var data0 = std.ArrayList([4]f32).init(allocator); + defer data0.deinit(); + var data1 = std.ArrayList([4]f32).init(allocator); + defer data1.deinit(); + + var i: usize = 0; + while (i < 256) : (i += 1) { + try data0.append([4]f32{ random.float(f32), random.float(f32), random.float(f32), random.float(f32) }); + try data1.append([4]f32{ random.float(f32), random.float(f32), random.float(f32), random.float(f32) }); + } + + // Warmup, fills L1 cache. + i = 0; + while (i < 100) : (i += 1) { + for (data1.items) |b| { + for (data0.items) |a| { + const va = zm.loadArr4(a); + const vb = zm.loadArr4(b); + const r = zm.qmul(va, vb); + std.mem.doNotOptimizeAway(&r); + } + } + } + + { + i = 0; + var timer = try Timer.start(); + const start = timer.lap(); + while (i < count) : (i += 1) { + for (data1.items) |b| { + for (data0.items) |a| { + const r = [4]f32{ + (b[3] * a[0]) + (b[0] * a[3]) + (b[1] * a[2]) - (b[2] * a[1]), + (b[3] * a[1]) - (b[0] * a[2]) + (b[1] * a[3]) + (b[2] * a[0]), + (b[3] * a[2]) + (b[0] * a[1]) - (b[1] * a[0]) + (b[2] * a[3]), + (b[3] * a[3]) - (b[0] * a[0]) - (b[1] * a[1]) - (b[2] * a[2]), + }; + std.mem.doNotOptimizeAway(&r); + } + } + } + const end = timer.read(); + const elapsed_s = @intToFloat(f64, end - start) / time.ns_per_s; + + std.debug.print("scalar version: {d:.4}s, ", .{elapsed_s}); + } + + { + i = 0; + var timer = try Timer.start(); + const start = timer.lap(); + while (i < count) : (i += 1) { + for (data1.items) |b| { + for (data0.items) |a| { + const va = zm.loadArr4(a); + const vb = zm.loadArr4(b); + const r = zm.qmul(va, vb); + std.mem.doNotOptimizeAway(&r); + } + } + } + const end = timer.read(); + const elapsed_s = @intToFloat(f64, end - start) / time.ns_per_s; + + std.debug.print("zmath version: {d:.4}s\n", .{elapsed_s}); + } +} + +noinline fn waveBenchmark(allocator: std.mem.Allocator, comptime count: comptime_int) !void { + _ = allocator; + std.debug.print("{s:>42} - ", .{"wave benchmark (SOA)"}); + + const grid_size = 1024; + { + var t: f32 = 0.0; + + const scale: f32 = 0.05; + + var timer = try Timer.start(); + const start = timer.lap(); + + var iter: usize = 0; + while (iter < count) : (iter += 1) { + var z_index: i32 = 0; + while (z_index < grid_size) : (z_index += 1) { + const z = scale * @intToFloat(f32, z_index - grid_size / 2); + + var x_index: i32 = 0; + while (x_index < grid_size) : (x_index += 4) { + const x0 = scale * @intToFloat(f32, x_index + 0 - grid_size / 2); + const x1 = scale * @intToFloat(f32, x_index + 1 - grid_size / 2); + const x2 = scale * @intToFloat(f32, x_index + 2 - grid_size / 2); + const x3 = scale * @intToFloat(f32, x_index + 3 - grid_size / 2); + + const d0 = zm.sqrt(x0 * x0 + z * z); + const d1 = zm.sqrt(x1 * x1 + z * z); + const d2 = zm.sqrt(x2 * x2 + z * z); + const d3 = zm.sqrt(x3 * x3 + z * z); + + const y0 = zm.sin(d0 - t); + const y1 = zm.sin(d1 - t); + const y2 = zm.sin(d2 - t); + const y3 = zm.sin(d3 - t); + + std.mem.doNotOptimizeAway(&y0); + std.mem.doNotOptimizeAway(&y1); + std.mem.doNotOptimizeAway(&y2); + std.mem.doNotOptimizeAway(&y3); + } + } + t += 0.001; + } + const end = timer.read(); + const elapsed_s = @intToFloat(f64, end - start) / time.ns_per_s; + + std.debug.print("scalar version: {d:.4}s, ", .{elapsed_s}); + } + + { + const T = zm.F32x16; + + const static = struct { + const offsets = [16]f32{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }; + }; + const voffset = zm.load(static.offsets[0..], T, 0); + var vt = zm.splat(T, 0.0); + + const scale: f32 = 0.05; + + var timer = try Timer.start(); + const start = timer.lap(); + + var iter: usize = 0; + while (iter < count) : (iter += 1) { + var z_index: i32 = 0; + while (z_index < grid_size) : (z_index += 1) { + const z = scale * @intToFloat(f32, z_index - grid_size / 2); + const vz = zm.splat(T, z); + + var x_index: i32 = 0; + while (x_index < grid_size) : (x_index += zm.veclen(T)) { + const x = scale * @intToFloat(f32, x_index - grid_size / 2); + const vx = zm.splat(T, x) + voffset * zm.splat(T, scale); + + const d = zm.sqrt(vx * vx + vz * vz); + + const vy = zm.sin(d - vt); + + std.mem.doNotOptimizeAway(&vy); + } + } + vt += zm.splat(T, 0.001); + } + const end = timer.read(); + const elapsed_s = @intToFloat(f64, end - start) / time.ns_per_s; + + std.debug.print("zmath version: {d:.4}s\n", .{elapsed_s}); + } +} diff --git a/lib/zmath/src/main.zig b/lib/zmath/src/main.zig new file mode 100644 index 0000000..5834745 --- /dev/null +++ b/lib/zmath/src/main.zig @@ -0,0 +1,18 @@ +//-------------------------------------------------------------------------------------------------- +// +// SIMD math library for game developers +// https://github.com/michal-z/zig-gamedev/tree/main/libs/zmath +// +// See zmath.zig for more details. +// See util.zig for additional functionality. +// +//-------------------------------------------------------------------------------------------------- +pub const version = @import("std").SemanticVersion{ .major = 0, .minor = 9, .patch = 6 }; + +pub usingnamespace @import("zmath.zig"); +pub const util = @import("util.zig"); + +// ensure transitive closure of test coverage +comptime { + _ = util; +} diff --git a/lib/zmath/src/util.zig b/lib/zmath/src/util.zig new file mode 100644 index 0000000..aa79020 --- /dev/null +++ b/lib/zmath/src/util.zig @@ -0,0 +1,182 @@ +// ============================================================================== +// +// Collection of useful functions building on top of, and extending, core zmath. +// https://github.com/michal-z/zig-gamedev/tree/main/libs/zmath +// +// ------------------------------------------------------------------------------ +// 1. Matrix functions +// ------------------------------------------------------------------------------ +// +// As an example, in a left handed Y-up system: +// getAxisX is equivalent to the right vector +// getAxisY is equivalent to the up vector +// getAxisZ is equivalent to the forward vector +// +// getTranslationVec(m: Mat) Vec +// getAxisX(m: Mat) Vec +// getAxisY(m: Mat) Vec +// getAxisZ(m: Mat) Vec +// +// ============================================================================== + +const zm = @import("zmath.zig"); +const std = @import("std"); +const math = std.math; +const expect = std.testing.expect; + +pub fn getTranslationVec(m: zm.Mat) zm.Vec { + var translation = m[3]; + translation[3] = 0; + return translation; +} + +pub fn getScaleVec(m: zm.Mat) zm.Vec { + const scale_x = zm.length3(zm.f32x4(m[0][0], m[1][0], m[2][0], 0))[0]; + const scale_y = zm.length3(zm.f32x4(m[0][1], m[1][1], m[2][1], 0))[0]; + const scale_z = zm.length3(zm.f32x4(m[0][2], m[1][2], m[2][2], 0))[0]; + return zm.f32x4(scale_x, scale_y, scale_z, 0); +} + +pub fn getRotationQuat(_m: zm.Mat) zm.Quat { + // Ortho normalize given matrix. + const c1 = zm.normalize3(zm.f32x4(_m[0][0], _m[1][0], _m[2][0], 0)); + const c2 = zm.normalize3(zm.f32x4(_m[0][1], _m[1][1], _m[2][1], 0)); + const c3 = zm.normalize3(zm.f32x4(_m[0][2], _m[1][2], _m[2][2], 0)); + var m = _m; + m[0][0] = c1[0]; + m[1][0] = c1[1]; + m[2][0] = c1[2]; + m[0][1] = c2[0]; + m[1][1] = c2[1]; + m[2][1] = c2[2]; + m[0][2] = c3[0]; + m[1][2] = c3[1]; + m[2][2] = c3[2]; + + // Extract rotation + return zm.quatFromMat(m); +} + +pub fn getAxisX(m: zm.Mat) zm.Vec { + return zm.normalize3(zm.f32x4(m[0][0], m[0][1], m[0][2], 0.0)); +} + +pub fn getAxisY(m: zm.Mat) zm.Vec { + return zm.normalize3(zm.f32x4(m[1][0], m[1][1], m[1][2], 0.0)); +} + +pub fn getAxisZ(m: zm.Mat) zm.Vec { + return zm.normalize3(zm.f32x4(m[2][0], m[2][1], m[2][2], 0.0)); +} + +test "zmath.util.mat.translation" { + // zig fmt: off + const mat_data = [18]f32{ + 1.0, + 2.0, 3.0, 4.0, 5.0, + 6.0, 7.0, 8.0, 9.0, + 10.0,11.0, 12.0,13.0, + 14.0, 15.0, 16.0, 17.0, + 18.0, + }; + // zig fmt: on + const mat = zm.loadMat(mat_data[1..]); + const translation = getTranslationVec(mat); + try expect(zm.approxEqAbs(translation, zm.f32x4(14.0, 15.0, 16.0, 0.0), 0.0001)); +} + +test "zmath.util.mat.scale" { + const mat = zm.mul(zm.scaling(3, 4, 5), zm.translation(6, 7, 8)); + const scale = getScaleVec(mat); + try expect(zm.approxEqAbs(scale, zm.f32x4(3.0, 4.0, 5.0, 0.0), 0.0001)); +} + +test "zmath.util.mat.rotation" { + const rotate_origin = zm.matFromRollPitchYaw(0.1, 1.2, 2.3); + const mat = zm.mul(zm.mul(rotate_origin, zm.scaling(3, 4, 5)), zm.translation(6, 7, 8)); + const rotate_get = getRotationQuat(mat); + const v0 = zm.mul(zm.f32x4s(1), rotate_origin); + const v1 = zm.mul(zm.f32x4s(1), zm.quatToMat(rotate_get)); + try expect(zm.approxEqAbs(v0, v1, 0.0001)); +} + +test "zmath.util.mat.z_vec" { + const degToRad = std.math.degreesToRadians; + var identity = zm.identity(); + var z_vec = getAxisZ(identity); + try expect(zm.approxEqAbs(z_vec, zm.f32x4(0.0, 0.0, 1.0, 0), 0.0001)); + const rot_yaw = zm.rotationY(degToRad(f32, 90)); + identity = zm.mul(identity, rot_yaw); + z_vec = getAxisZ(identity); + try expect(zm.approxEqAbs(z_vec, zm.f32x4(1.0, 0.0, 0.0, 0), 0.0001)); +} + +test "zmath.util.mat.y_vec" { + const degToRad = std.math.degreesToRadians; + var identity = zm.identity(); + var y_vec = getAxisY(identity); + try expect(zm.approxEqAbs(y_vec, zm.f32x4(0.0, 1.0, 0.0, 0), 0.01)); + const rot_yaw = zm.rotationY(degToRad(f32, 90)); + identity = zm.mul(identity, rot_yaw); + y_vec = getAxisY(identity); + try expect(zm.approxEqAbs(y_vec, zm.f32x4(0.0, 1.0, 0.0, 0), 0.01)); + const rot_pitch = zm.rotationX(degToRad(f32, 90)); + identity = zm.mul(identity, rot_pitch); + y_vec = getAxisY(identity); + try expect(zm.approxEqAbs(y_vec, zm.f32x4(0.0, 0.0, 1.0, 0), 0.01)); +} + +test "zmath.util.mat.right" { + const degToRad = std.math.degreesToRadians; + var identity = zm.identity(); + var right = getAxisX(identity); + try expect(zm.approxEqAbs(right, zm.f32x4(1.0, 0.0, 0.0, 0), 0.01)); + const rot_yaw = zm.rotationY(degToRad(f32, 90)); + identity = zm.mul(identity, rot_yaw); + right = getAxisX(identity); + try expect(zm.approxEqAbs(right, zm.f32x4(0.0, 0.0, -1.0, 0), 0.01)); + const rot_pitch = zm.rotationX(degToRad(f32, 90)); + identity = zm.mul(identity, rot_pitch); + right = getAxisX(identity); + try expect(zm.approxEqAbs(right, zm.f32x4(0.0, 1.0, 0.0, 0), 0.01)); +} + +// ------------------------------------------------------------------------------ +// This software is available under 2 licenses -- choose whichever you prefer. +// ------------------------------------------------------------------------------ +// ALTERNATIVE A - MIT License +// Copyright (c) 2022 Michal Ziulek and Contributors +// Permission is hereby granted, free of charge, to any person obtaining identity copy of +// this software and associated documentation files (the "Software"), to deal in +// the Software without restriction, including without limitation the rights to +// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +// of the Software, and to permit persons to whom the Software is furnished to do +// so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// ------------------------------------------------------------------------------ +// ALTERNATIVE B - Public Domain (www.unlicense.org) +// This is free and unencumbered software released into the public domain. +// Anyone is free to copy, modify, publish, use, compile, sell, or distribute this +// software, either in source code form or as identity compiled binary, for any purpose, +// commercial or non-commercial, and by any means. +// In jurisdictions that recognize copyright laws, the author or authors of this +// software dedicate any and all copyright interest in the software to the public +// domain. We make this dedication for the benefit of the public at large and to +// the detriment of our heirs and successors. We intend this dedication to be an +// overt act of relinquishment in perpetuity of all present and future rights to +// this software under copyright law. +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +// ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +// ------------------------------------------------------------------------------ diff --git a/lib/zmath/src/zmath.zig b/lib/zmath/src/zmath.zig new file mode 100644 index 0000000..383ed55 --- /dev/null +++ b/lib/zmath/src/zmath.zig @@ -0,0 +1,4442 @@ +// ============================================================================== +// +// SIMD math library for game developers +// https://github.com/michal-z/zig-gamedev/tree/main/libs/zmath +// +// Should work on all OSes supported by Zig. Works on x86_64 and ARM. +// Provides ~140 optimized routines and ~70 extensive tests. +// Can be used with any graphics API. +// +// zmath uses row-major matrices, row vectors (each row vector is stored in a SIMD register). +// Handedness is determined by which function version is used (Rh vs. Lh), +// otherwise the function works with either left-handed or right-handed view coordinates. +// +// const va = f32x4(1.0, 2.0, 3.0, 1.0); +// const vb = f32x4(-1.0, 1.0, -1.0, 1.0); +// const v0 = va + vb - f32x4(0.0, 1.0, 0.0, 1.0) * f32x4s(3.0); +// const v1 = cross3(va, vb) + f32x4(1.0, 1.0, 1.0, 1.0); +// const v2 = va + dot3(va, vb) / v1; // dotN() returns scalar replicated on all vector components +// +// const m = rotationX(math.pi * 0.25); +// const v = f32x4(...); +// const v0 = mul(v, m); // 'v' treated as a row vector +// const v1 = mul(m, v); // 'v' treated as a column vector +// const f = m[row][column]; +// +// const b = va < vb; +// if (all(b, 0)) { ... } // '0' means check all vector components; if all are 'true' +// if (all(b, 3)) { ... } // '3' means check first three vector components; if all first three are 'true' +// if (any(b, 0)) { ... } // '0' means check all vector components; if any is 'true' +// if (any(b, 3)) { ... } // '3' means check first three vector components; if any from first three is 'true' +// +// var v4 = load(mem[0..], F32x4, 0); +// var v8 = load(mem[100..], F32x8, 0); +// var v16 = load(mem[200..], F32x16, 0); +// +// var camera_position = [3]f32{ 1.0, 2.0, 3.0 }; +// var cam_pos = loadArr3(camera_position); +// ... +// storeArr3(&camera_position, cam_pos); +// +// v4 = sin(v4); // SIMDx4 +// v8 = cos(v8); // .x86_64 -> 2 x SIMDx4, .x86_64+avx+fma -> SIMDx8 +// v16 = atan(v16); // .x86_64 -> 4 x SIMDx4, .x86_64+avx+fma -> 2 x SIMDx8, .x86_64+avx512f -> SIMDx16 +// +// store(mem[0..], v4, 0); +// store(mem[100..], v8, 0); +// store(mem[200..], v16, 0); +// +// ------------------------------------------------------------------------------ +// 1. Initialization functions +// ------------------------------------------------------------------------------ +// +// f32x4(e0: f32, e1: f32, e2: f32, e3: f32) F32x4 +// f32x8(e0: f32, e1: f32, e2: f32, e3: f32, e4: f32, e5: f32, e6: f32, e7: f32) F32x8 +// f32x16(e0: f32, e1: f32, e2: f32, e3: f32, e4: f32, e5: f32, e6: f32, e7: f32, +// e8: f32, e9: f32, ea: f32, eb: f32, ec: f32, ed: f32, ee: f32, ef: f32) F32x16 +// +// f32x4s(e0: f32) F32x4 +// f32x8s(e0: f32) F32x8 +// f32x16s(e0: f32) F32x16 +// +// boolx4(e0: bool, e1: bool, e2: bool, e3: bool) Boolx4 +// boolx8(e0: bool, e1: bool, e2: bool, e3: bool, e4: bool, e5: bool, e6: bool, e7: bool) Boolx8 +// boolx16(e0: bool, e1: bool, e2: bool, e3: bool, e4: bool, e5: bool, e6: bool, e7: bool, +// e8: bool, e9: bool, ea: bool, eb: bool, ec: bool, ed: bool, ee: bool, ef: bool) Boolx16 +// +// load(mem: []const f32, comptime T: type, comptime len: u32) T +// store(mem: []f32, v: anytype, comptime len: u32) void +// +// loadArr2(arr: [2]f32) F32x4 +// loadArr2zw(arr: [2]f32, z: f32, w: f32) F32x4 +// loadArr3(arr: [3]f32) F32x4 +// loadArr3w(arr: [3]f32, w: f32) F32x4 +// loadArr4(arr: [4]f32) F32x4 +// +// storeArr2(arr: *[2]f32, v: F32x4) void +// storeArr3(arr: *[3]f32, v: F32x4) void +// storeArr4(arr: *[4]f32, v: F32x4) void +// +// arr3Ptr(ptr: anytype) *const [3]f32 +// arrNPtr(ptr: anytype) [*]const f32 +// +// splat(comptime T: type, value: f32) T +// splatInt(comptime T: type, value: u32) T +// +// ------------------------------------------------------------------------------ +// 2. Functions that work on all vector components (F32xN = F32x4 or F32x8 or F32x16) +// ------------------------------------------------------------------------------ +// +// all(vb: anytype, comptime len: u32) bool +// any(vb: anytype, comptime len: u32) bool +// +// isNearEqual(v0: F32xN, v1: F32xN, epsilon: F32xN) BoolxN +// isNan(v: F32xN) BoolxN +// isInf(v: F32xN) BoolxN +// isInBounds(v: F32xN, bounds: F32xN) BoolxN +// +// andInt(v0: F32xN, v1: F32xN) F32xN +// andNotInt(v0: F32xN, v1: F32xN) F32xN +// orInt(v0: F32xN, v1: F32xN) F32xN +// norInt(v0: F32xN, v1: F32xN) F32xN +// xorInt(v0: F32xN, v1: F32xN) F32xN +// +// minFast(v0: F32xN, v1: F32xN) F32xN +// maxFast(v0: F32xN, v1: F32xN) F32xN +// min(v0: F32xN, v1: F32xN) F32xN +// max(v0: F32xN, v1: F32xN) F32xN +// round(v: F32xN) F32xN +// floor(v: F32xN) F32xN +// trunc(v: F32xN) F32xN +// ceil(v: F32xN) F32xN +// clamp(v0: F32xN, v1: F32xN) F32xN +// clampFast(v0: F32xN, v1: F32xN) F32xN +// saturate(v: F32xN) F32xN +// saturateFast(v: F32xN) F32xN +// lerp(v0: F32xN, v1: F32xN, t: f32) F32xN +// lerpV(v0: F32xN, v1: F32xN, t: F32xN) F32xN +// lerpInverse(v0: F32xN, v1: F32xN, t: f32) F32xN +// lerpInverseV(v0: F32xN, v1: F32xN, t: F32xN) F32xN +// mapLinear(v: F32xN, min1: f32, max1: f32, min2: f32, max2: f32) F32xN +// mapLinearV(v: F32xN, min1: F32xN, max1: F32xN, min2: F32xN, max2: F32xN) F32xN +// sqrt(v: F32xN) F32xN +// abs(v: F32xN) F32xN +// mod(v0: F32xN, v1: F32xN) F32xN +// modAngle(v: F32xN) F32xN +// mulAdd(v0: F32xN, v1: F32xN, v2: F32xN) F32xN +// select(mask: BoolxN, v0: F32xN, v1: F32xN) +// sin(v: F32xN) F32xN +// cos(v: F32xN) F32xN +// sincos(v: F32xN) [2]F32xN +// asin(v: F32xN) F32xN +// acos(v: F32xN) F32xN +// atan(v: F32xN) F32xN +// atan2(vy: F32xN, vx: F32xN) F32xN +// cmulSoa(re0: F32xN, im0: F32xN, re1: F32xN, im1: F32xN) [2]F32xN +// +// ------------------------------------------------------------------------------ +// 3. 2D, 3D, 4D vector functions +// ------------------------------------------------------------------------------ +// +// swizzle(v: Vec, c, c, c, c) Vec (comptime c = .x | .y | .z | .w) +// dot2(v0: Vec, v1: Vec) F32x4 +// dot3(v0: Vec, v1: Vec) F32x4 +// dot4(v0: Vec, v1: Vec) F32x4 +// cross3(v0: Vec, v1: Vec) Vec +// lengthSq2(v: Vec) F32x4 +// lengthSq3(v: Vec) F32x4 +// lengthSq4(v: Vec) F32x4 +// length2(v: Vec) F32x4 +// length3(v: Vec) F32x4 +// length4(v: Vec) F32x4 +// normalize2(v: Vec) Vec +// normalize3(v: Vec) Vec +// normalize4(v: Vec) Vec +// +// vecToArr2(v: Vec) [2]f32 +// vecToArr3(v: Vec) [3]f32 +// vecToArr4(v: Vec) [4]f32 +// +// ------------------------------------------------------------------------------ +// 4. Matrix functions +// ------------------------------------------------------------------------------ +// +// identity() Mat +// mul(m0: Mat, m1: Mat) Mat +// mul(s: f32, m: Mat) Mat +// mul(m: Mat, s: f32) Mat +// mul(v: Vec, m: Mat) Vec +// mul(m: Mat, v: Vec) Vec +// transpose(m: Mat) Mat +// rotationX(angle: f32) Mat +// rotationY(angle: f32) Mat +// rotationZ(angle: f32) Mat +// translation(x: f32, y: f32, z: f32) Mat +// translationV(v: Vec) Mat +// scaling(x: f32, y: f32, z: f32) Mat +// scalingV(v: Vec) Mat +// lookToLh(eyepos: Vec, eyedir: Vec, updir: Vec) Mat +// lookAtLh(eyepos: Vec, focuspos: Vec, updir: Vec) Mat +// lookToRh(eyepos: Vec, eyedir: Vec, updir: Vec) Mat +// lookAtRh(eyepos: Vec, focuspos: Vec, updir: Vec) Mat +// perspectiveFovLh(fovy: f32, aspect: f32, near: f32, far: f32) Mat +// perspectiveFovRh(fovy: f32, aspect: f32, near: f32, far: f32) Mat +// perspectiveFovLhGl(fovy: f32, aspect: f32, near: f32, far: f32) Mat +// perspectiveFovRhGl(fovy: f32, aspect: f32, near: f32, far: f32) Mat +// orthographicLh(w: f32, h: f32, near: f32, far: f32) Mat +// orthographicRh(w: f32, h: f32, near: f32, far: f32) Mat +// orthographicLhGl(w: f32, h: f32, near: f32, far: f32) Mat +// orthographicRhGl(w: f32, h: f32, near: f32, far: f32) Mat +// orthographicOffCenterLh(left: f32, right: f32, top: f32, bottom: f32, near: f32, far: f32) Mat +// orthographicOffCenterRh(left: f32, right: f32, top: f32, bottom: f32, near: f32, far: f32) Mat +// orthographicOffCenterLhGl(left: f32, right: f32, top: f32, bottom: f32, near: f32, far: f32) Mat +// orthographicOffCenterRhGl(left: f32, right: f32, top: f32, bottom: f32, near: f32, far: f32) Mat +// determinant(m: Mat) F32x4 +// inverse(m: Mat) Mat +// inverseDet(m: Mat, det: ?*F32x4) Mat +// matToQuat(m: Mat) Quat +// matFromAxisAngle(axis: Vec, angle: f32) Mat +// matFromNormAxisAngle(axis: Vec, angle: f32) Mat +// matFromQuat(quat: Quat) Mat +// matFromRollPitchYaw(pitch: f32, yaw: f32, roll: f32) Mat +// matFromRollPitchYawV(angles: Vec) Mat +// +// loadMat(mem: []const f32) Mat +// loadMat43(mem: []const f32) Mat +// loadMat34(mem: []const f32) Mat +// storeMat(mem: []f32, m: Mat) void +// storeMat43(mem: []f32, m: Mat) void +// storeMat34(mem: []f32, m: Mat) void +// +// matToArr(m: Mat) [16]f32 +// matToArr43(m: Mat) [12]f32 +// matToArr34(m: Mat) [12]f32 +// +// ------------------------------------------------------------------------------ +// 5. Quaternion functions +// ------------------------------------------------------------------------------ +// +// qmul(q0: Quat, q1: Quat) Quat +// qidentity() Quat +// conjugate(quat: Quat) Quat +// inverse(q: Quat) Quat +// slerp(q0: Quat, q1: Quat, t: f32) Quat +// slerpV(q0: Quat, q1: Quat, t: F32x4) Quat +// quatToMat(quat: Quat) Mat +// quatToAxisAngle(quat: Quat, axis: *Vec, angle: *f32) void +// quatFromMat(m: Mat) Quat +// quatFromAxisAngle(axis: Vec, angle: f32) Quat +// quatFromNormAxisAngle(axis: Vec, angle: f32) Quat +// quatFromRollPitchYaw(pitch: f32, yaw: f32, roll: f32) Quat +// quatFromRollPitchYawV(angles: Vec) Quat +// +// ------------------------------------------------------------------------------ +// 6. Color functions +// ------------------------------------------------------------------------------ +// +// adjustSaturation(color: F32x4, saturation: f32) F32x4 +// adjustContrast(color: F32x4, contrast: f32) F32x4 +// rgbToHsl(rgb: F32x4) F32x4 +// hslToRgb(hsl: F32x4) F32x4 +// rgbToHsv(rgb: F32x4) F32x4 +// hsvToRgb(hsv: F32x4) F32x4 +// rgbToSrgb(rgb: F32x4) F32x4 +// srgbToRgb(srgb: F32x4) F32x4 +// +// ------------------------------------------------------------------------------ +// X. Misc functions +// ------------------------------------------------------------------------------ +// +// linePointDistance(linept0: Vec, linept1: Vec, pt: Vec) F32x4 +// sin(v: f32) f32 +// cos(v: f32) f32 +// sincos(v: f32) [2]f32 +// asin(v: f32) f32 +// acos(v: f32) f32 +// +// fftInitUnityTable(unitytable: []F32x4) void +// fft(re: []F32x4, im: []F32x4, unitytable: []const F32x4) void +// ifft(re: []F32x4, im: []const F32x4, unitytable: []const F32x4) void +// +// ============================================================================== + +// Fundamental types +pub const F32x4 = @Vector(4, f32); +pub const F32x8 = @Vector(8, f32); +pub const F32x16 = @Vector(16, f32); +pub const Boolx4 = @Vector(4, bool); +pub const Boolx8 = @Vector(8, bool); +pub const Boolx16 = @Vector(16, bool); + +// "Higher-level" aliases +pub const Vec = F32x4; +pub const Mat = [4]F32x4; +pub const Quat = F32x4; + +const builtin = @import("builtin"); +const std = @import("std"); +const math = std.math; +const assert = std.debug.assert; +const expect = std.testing.expect; + +const cpu_arch = builtin.cpu.arch; +const has_avx = if (cpu_arch == .x86_64) std.Target.x86.featureSetHas(builtin.cpu.features, .avx) else false; +const has_avx512f = if (cpu_arch == .x86_64) std.Target.x86.featureSetHas(builtin.cpu.features, .avx512f) else false; +const has_fma = if (cpu_arch == .x86_64) std.Target.x86.featureSetHas(builtin.cpu.features, .fma) else false; +// ------------------------------------------------------------------------------ +// +// 1. Initialization functions +// +// ------------------------------------------------------------------------------ +pub inline fn f32x4(e0: f32, e1: f32, e2: f32, e3: f32) F32x4 { + return .{ e0, e1, e2, e3 }; +} +pub inline fn f32x8(e0: f32, e1: f32, e2: f32, e3: f32, e4: f32, e5: f32, e6: f32, e7: f32) F32x8 { + return .{ e0, e1, e2, e3, e4, e5, e6, e7 }; +} +// zig fmt: off +pub inline fn f32x16( + e0: f32, e1: f32, e2: f32, e3: f32, e4: f32, e5: f32, e6: f32, e7: f32, + e8: f32, e9: f32, ea: f32, eb: f32, ec: f32, ed: f32, ee: f32, ef: f32) F32x16 { + return .{ e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, ea, eb, ec, ed, ee, ef }; +} +// zig fmt: on + +pub inline fn f32x4s(e0: f32) F32x4 { + return splat(F32x4, e0); +} +pub inline fn f32x8s(e0: f32) F32x8 { + return splat(F32x8, e0); +} +pub inline fn f32x16s(e0: f32) F32x16 { + return splat(F32x16, e0); +} + +pub inline fn boolx4(e0: bool, e1: bool, e2: bool, e3: bool) Boolx4 { + return .{ e0, e1, e2, e3 }; +} +pub inline fn boolx8(e0: bool, e1: bool, e2: bool, e3: bool, e4: bool, e5: bool, e6: bool, e7: bool) Boolx8 { + return .{ e0, e1, e2, e3, e4, e5, e6, e7 }; +} +// zig fmt: off +pub inline fn boolx16( + e0: bool, e1: bool, e2: bool, e3: bool, e4: bool, e5: bool, e6: bool, e7: bool, + e8: bool, e9: bool, ea: bool, eb: bool, ec: bool, ed: bool, ee: bool, ef: bool) Boolx16 { + return .{ e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, ea, eb, ec, ed, ee, ef }; +} +// zig fmt: on + +pub inline fn veclen(comptime T: type) comptime_int { + return @typeInfo(T).Vector.len; +} + +pub inline fn splat(comptime T: type, value: f32) T { + return @splat(veclen(T), value); +} +pub inline fn splatInt(comptime T: type, value: u32) T { + return @splat(veclen(T), @bitCast(f32, value)); +} + +pub fn load(mem: []const f32, comptime T: type, comptime len: u32) T { + var v = splat(T, 0.0); + comptime var loop_len = if (len == 0) veclen(T) else len; + comptime var i: u32 = 0; + inline while (i < loop_len) : (i += 1) { + v[i] = mem[i]; + } + return v; +} +test "zmath.load" { + const a = [7]f32{ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0 }; + var ptr = &a; + var i: u32 = 0; + const v0 = load(a[i..], F32x4, 2); + try expect(approxEqAbs(v0, F32x4{ 1.0, 2.0, 0.0, 0.0 }, 0.0)); + i += 2; + const v1 = load(a[i .. i + 2], F32x4, 2); + try expect(approxEqAbs(v1, F32x4{ 3.0, 4.0, 0.0, 0.0 }, 0.0)); + const v2 = load(a[5..7], F32x4, 2); + try expect(approxEqAbs(v2, F32x4{ 6.0, 7.0, 0.0, 0.0 }, 0.0)); + const v3 = load(ptr[1..], F32x4, 2); + try expect(approxEqAbs(v3, F32x4{ 2.0, 3.0, 0.0, 0.0 }, 0.0)); + i += 1; + const v4 = load(ptr[i .. i + 2], F32x4, 2); + try expect(approxEqAbs(v4, F32x4{ 4.0, 5.0, 0.0, 0.0 }, 0.0)); +} + +pub fn store(mem: []f32, v: anytype, comptime len: u32) void { + const T = @TypeOf(v); + comptime var loop_len = if (len == 0) veclen(T) else len; + comptime var i: u32 = 0; + inline while (i < loop_len) : (i += 1) { + mem[i] = v[i]; + } +} +test "zmath.store" { + var a = [7]f32{ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0 }; + const v = load(a[1..], F32x4, 3); + store(a[2..], v, 4); + try expect(a[0] == 1.0); + try expect(a[1] == 2.0); + try expect(a[2] == 2.0); + try expect(a[3] == 3.0); + try expect(a[4] == 4.0); + try expect(a[5] == 0.0); +} + +pub inline fn loadArr2(arr: [2]f32) F32x4 { + return f32x4(arr[0], arr[1], 0.0, 0.0); +} +pub inline fn loadArr2zw(arr: [2]f32, z: f32, w: f32) F32x4 { + return f32x4(arr[0], arr[1], z, w); +} +pub inline fn loadArr3(arr: [3]f32) F32x4 { + return f32x4(arr[0], arr[1], arr[2], 0.0); +} +pub inline fn loadArr3w(arr: [3]f32, w: f32) F32x4 { + return f32x4(arr[0], arr[1], arr[2], w); +} +pub inline fn loadArr4(arr: [4]f32) F32x4 { + return f32x4(arr[0], arr[1], arr[2], arr[3]); +} + +pub inline fn storeArr2(arr: *[2]f32, v: F32x4) void { + arr.* = .{ v[0], v[1] }; +} +pub inline fn storeArr3(arr: *[3]f32, v: F32x4) void { + arr.* = .{ v[0], v[1], v[2] }; +} +pub inline fn storeArr4(arr: *[4]f32, v: F32x4) void { + arr.* = .{ v[0], v[1], v[2], v[3] }; +} + +pub inline fn arr3Ptr(ptr: anytype) *const [3]f32 { + comptime assert(@typeInfo(@TypeOf(ptr)) == .Pointer); + const T = std.meta.Child(@TypeOf(ptr)); + comptime assert(T == F32x4); + return @ptrCast(*const [3]f32, ptr); +} + +pub inline fn arrNPtr(ptr: anytype) [*]const f32 { + comptime assert(@typeInfo(@TypeOf(ptr)) == .Pointer); + const T = std.meta.Child(@TypeOf(ptr)); + comptime assert(T == Mat or T == F32x4 or T == F32x8 or T == F32x16); + return @ptrCast([*]const f32, ptr); +} +test "zmath.arrNPtr" { + { + const mat = identity(); + const f32ptr = arrNPtr(&mat); + try expect(f32ptr[0] == 1.0); + try expect(f32ptr[5] == 1.0); + try expect(f32ptr[10] == 1.0); + try expect(f32ptr[15] == 1.0); + } + { + const v8 = f32x8s(1.0); + const f32ptr = arrNPtr(&v8); + try expect(f32ptr[1] == 1.0); + try expect(f32ptr[7] == 1.0); + } +} + +test "zmath.loadArr" { + { + const camera_position = [3]f32{ 1.0, 2.0, 3.0 }; + const simd_reg = loadArr3(camera_position); + try expect(approxEqAbs(simd_reg, f32x4(1.0, 2.0, 3.0, 0.0), 0.0)); + } + { + const camera_position = [3]f32{ 1.0, 2.0, 3.0 }; + const simd_reg = loadArr3w(camera_position, 1.0); + try expect(approxEqAbs(simd_reg, f32x4(1.0, 2.0, 3.0, 1.0), 0.0)); + } +} + +pub inline fn vecToArr2(v: Vec) [2]f32 { + return .{ v[0], v[1] }; +} +pub inline fn vecToArr3(v: Vec) [3]f32 { + return .{ v[0], v[1], v[2] }; +} +pub inline fn vecToArr4(v: Vec) [4]f32 { + return .{ v[0], v[1], v[2], v[3] }; +} +// ------------------------------------------------------------------------------ +// +// 2. Functions that work on all vector components (F32xN = F32x4 or F32x8 or F32x16) +// +// ------------------------------------------------------------------------------ +pub fn all(vb: anytype, comptime len: u32) bool { + const T = @TypeOf(vb); + if (len > veclen(T)) { + @compileError("zmath.all(): 'len' is greater than vector len of type " ++ @typeName(T)); + } + comptime var loop_len = if (len == 0) veclen(T) else len; + const ab: [veclen(T)]bool = vb; + comptime var i: u32 = 0; + var result = true; + inline while (i < loop_len) : (i += 1) { + result = result and ab[i]; + } + return result; +} +test "zmath.all" { + try expect(all(boolx8(true, true, true, true, true, false, true, false), 5) == true); + try expect(all(boolx8(true, true, true, true, true, false, true, false), 6) == false); + try expect(all(boolx8(true, true, true, true, false, false, false, false), 4) == true); + try expect(all(boolx4(true, true, true, false), 3) == true); + try expect(all(boolx4(true, true, true, false), 1) == true); + try expect(all(boolx4(true, false, false, false), 1) == true); + try expect(all(boolx4(false, true, false, false), 1) == false); + try expect(all(boolx8(true, true, true, true, true, false, true, false), 0) == false); + try expect(all(boolx4(false, true, false, false), 0) == false); + try expect(all(boolx4(true, true, true, true), 0) == true); +} + +pub fn any(vb: anytype, comptime len: u32) bool { + const T = @TypeOf(vb); + if (len > veclen(T)) { + @compileError("zmath.any(): 'len' is greater than vector len of type " ++ @typeName(T)); + } + comptime var loop_len = if (len == 0) veclen(T) else len; + const ab: [veclen(T)]bool = vb; + comptime var i: u32 = 0; + var result = false; + inline while (i < loop_len) : (i += 1) { + result = result or ab[i]; + } + return result; +} +test "zmath.any" { + try expect(any(boolx8(true, true, true, true, true, false, true, false), 0) == true); + try expect(any(boolx8(false, false, false, true, true, false, true, false), 3) == false); + try expect(any(boolx8(false, false, false, false, false, true, false, false), 4) == false); +} + +pub inline fn isNearEqual( + v0: anytype, + v1: anytype, + epsilon: anytype, +) @Vector(veclen(@TypeOf(v0)), bool) { + const T = @TypeOf(v0, v1, epsilon); + const delta = v0 - v1; + const temp = maxFast(delta, splat(T, 0.0) - delta); + return temp <= epsilon; +} +test "zmath.isNearEqual" { + if (builtin.target.os.tag == .macos and builtin.zig_backend != .stage1) return error.SkipZigTest; + { + const v0 = f32x4(1.0, 2.0, -3.0, 4.001); + const v1 = f32x4(1.0, 2.1, 3.0, 4.0); + const b = isNearEqual(v0, v1, splat(F32x4, 0.01)); + try expect(@reduce(.And, b == boolx4(true, false, false, true))); + } + { + const v0 = f32x8(1.0, 2.0, -3.0, 4.001, 1.001, 2.3, -0.0, 0.0); + const v1 = f32x8(1.0, 2.1, 3.0, 4.0, -1.001, 2.1, 0.0, 0.0); + const b = isNearEqual(v0, v1, splat(F32x8, 0.01)); + try expect(@reduce(.And, b == boolx8(true, false, false, true, false, false, true, true))); + } + try expect(all(isNearEqual( + splat(F32x4, math.inf_f32), + splat(F32x4, math.inf_f32), + splat(F32x4, 0.0001), + ), 0) == false); + try expect(all(isNearEqual( + splat(F32x4, -math.inf_f32), + splat(F32x4, math.inf_f32), + splat(F32x4, 0.0001), + ), 0) == false); + try expect(all(isNearEqual( + splat(F32x4, -math.inf_f32), + splat(F32x4, -math.inf_f32), + splat(F32x4, 0.0001), + ), 0) == false); + try expect(all(isNearEqual( + splat(F32x4, -math.nan_f32), + splat(F32x4, math.inf_f32), + splat(F32x4, 0.0001), + ), 0) == false); +} + +pub inline fn isNan( + v: anytype, +) @Vector(veclen(@TypeOf(v)), bool) { + return v != v; +} +test "zmath.isNan" { + { + const v0 = f32x4(math.inf_f32, math.nan_f32, math.nan_f32, 7.0); + const b = isNan(v0); + try expect(@reduce(.And, b == boolx4(false, true, true, false))); + } + { + const v0 = f32x8(0, math.nan_f32, 0, 0, math.inf_f32, math.nan_f32, math.qnan_f32, 7.0); + const b = isNan(v0); + try expect(@reduce(.And, b == boolx8(false, true, false, false, false, true, true, false))); + } +} + +pub inline fn isInf( + v: anytype, +) @Vector(veclen(@TypeOf(v)), bool) { + const T = @TypeOf(v); + return abs(v) == splat(T, math.inf_f32); +} +test "zmath.isInf" { + { + const v0 = f32x4(math.inf_f32, math.nan_f32, math.qnan_f32, 7.0); + const b = isInf(v0); + try expect(@reduce(.And, b == boolx4(true, false, false, false))); + } + { + const v0 = f32x8(0, math.inf_f32, 0, 0, math.inf_f32, math.nan_f32, math.qnan_f32, 7.0); + const b = isInf(v0); + try expect(@reduce(.And, b == boolx8(false, true, false, false, true, false, false, false))); + } +} + +pub inline fn isInBounds( + v: anytype, + bounds: anytype, +) @Vector(veclen(@TypeOf(v)), bool) { + const T = @TypeOf(v, bounds); + const Tu = @Vector(veclen(T), u1); + const Tr = @Vector(veclen(T), bool); + + // 2 x cmpleps, xorps, load, andps + const b0 = v <= bounds; + const b1 = (bounds * splat(T, -1.0)) <= v; + const b0u = @bitCast(Tu, b0); + const b1u = @bitCast(Tu, b1); + return @bitCast(Tr, b0u & b1u); +} +test "zmath.isInBounds" { + { + const v0 = f32x4(0.5, -2.0, -1.0, 1.9); + const v1 = f32x4(-1.6, -2.001, -1.0, 1.9); + const bounds = f32x4(1.0, 2.0, 1.0, 2.0); + const b0 = isInBounds(v0, bounds); + const b1 = isInBounds(v1, bounds); + try expect(@reduce(.And, b0 == boolx4(true, true, true, true))); + try expect(@reduce(.And, b1 == boolx4(false, false, true, true))); + } + { + const v0 = f32x8(2.0, 1.0, 2.0, 1.0, 0.5, -2.0, -1.0, 1.9); + const bounds = f32x8(1.0, 1.0, 1.0, math.inf_f32, 1.0, math.nan_f32, 1.0, 2.0); + const b0 = isInBounds(v0, bounds); + try expect(@reduce(.And, b0 == boolx8(false, true, false, true, true, false, true, true))); + } +} + +pub inline fn andInt(v0: anytype, v1: anytype) @TypeOf(v0, v1) { + const T = @TypeOf(v0, v1); + const Tu = @Vector(veclen(T), u32); + const v0u = @bitCast(Tu, v0); + const v1u = @bitCast(Tu, v1); + return @bitCast(T, v0u & v1u); // andps +} +test "zmath.andInt" { + { + const v0 = f32x4(0, @bitCast(f32, ~@as(u32, 0)), 0, @bitCast(f32, ~@as(u32, 0))); + const v1 = f32x4(1.0, 2.0, 3.0, math.inf_f32); + const v = andInt(v0, v1); + try expect(v[3] == math.inf_f32); + try expect(approxEqAbs(v, f32x4(0.0, 2.0, 0.0, math.inf_f32), 0.0)); + } + { + const v0 = f32x8(0, 0, 0, 0, 0, @bitCast(f32, ~@as(u32, 0)), 0, @bitCast(f32, ~@as(u32, 0))); + const v1 = f32x8(0, 0, 0, 0, 1.0, 2.0, 3.0, math.inf_f32); + const v = andInt(v0, v1); + try expect(v[7] == math.inf_f32); + try expect(approxEqAbs(v, f32x8(0, 0, 0, 0, 0.0, 2.0, 0.0, math.inf_f32), 0.0)); + } +} + +pub inline fn andNotInt(v0: anytype, v1: anytype) @TypeOf(v0, v1) { + const T = @TypeOf(v0, v1); + const Tu = @Vector(veclen(T), u32); + const v0u = @bitCast(Tu, v0); + const v1u = @bitCast(Tu, v1); + return @bitCast(T, ~v0u & v1u); // andnps +} +test "zmath.andNotInt" { + { + const v0 = f32x4(1.0, 2.0, 3.0, 4.0); + const v1 = f32x4(0, @bitCast(f32, ~@as(u32, 0)), 0, @bitCast(f32, ~@as(u32, 0))); + const v = andNotInt(v1, v0); + try expect(approxEqAbs(v, f32x4(1.0, 0.0, 3.0, 0.0), 0.0)); + } + { + const v0 = f32x8(0, 0, 0, 0, 1.0, 2.0, 3.0, 4.0); + const v1 = f32x8(0, 0, 0, 0, 0, @bitCast(f32, ~@as(u32, 0)), 0, @bitCast(f32, ~@as(u32, 0))); + const v = andNotInt(v1, v0); + try expect(approxEqAbs(v, f32x8(0, 0, 0, 0, 1.0, 0.0, 3.0, 0.0), 0.0)); + } +} + +pub inline fn orInt(v0: anytype, v1: anytype) @TypeOf(v0, v1) { + const T = @TypeOf(v0, v1); + const Tu = @Vector(veclen(T), u32); + const v0u = @bitCast(Tu, v0); + const v1u = @bitCast(Tu, v1); + return @bitCast(T, v0u | v1u); // orps +} +test "zmath.orInt" { + { + const v0 = f32x4(0, @bitCast(f32, ~@as(u32, 0)), 0, 0); + const v1 = f32x4(1.0, 2.0, 3.0, 4.0); + const v = orInt(v0, v1); + try expect(v[0] == 1.0); + try expect(@bitCast(u32, v[1]) == ~@as(u32, 0)); + try expect(v[2] == 3.0); + try expect(v[3] == 4.0); + } + { + const v0 = f32x8(0, 0, 0, 0, 0, @bitCast(f32, ~@as(u32, 0)), 0, 0); + const v1 = f32x8(0, 0, 0, 0, 1.0, 2.0, 3.0, 4.0); + const v = orInt(v0, v1); + try expect(v[4] == 1.0); + try expect(@bitCast(u32, v[5]) == ~@as(u32, 0)); + try expect(v[6] == 3.0); + try expect(v[7] == 4.0); + } +} + +pub inline fn norInt(v0: anytype, v1: anytype) @TypeOf(v0, v1) { + const T = @TypeOf(v0, v1); + const Tu = @Vector(veclen(T), u32); + const v0u = @bitCast(Tu, v0); + const v1u = @bitCast(Tu, v1); + return @bitCast(T, ~(v0u | v1u)); // por, pcmpeqd, pxor +} + +pub inline fn xorInt(v0: anytype, v1: anytype) @TypeOf(v0, v1) { + const T = @TypeOf(v0, v1); + const Tu = @Vector(veclen(T), u32); + const v0u = @bitCast(Tu, v0); + const v1u = @bitCast(Tu, v1); + return @bitCast(T, v0u ^ v1u); // xorps +} +test "zmath.xorInt" { + { + const v0 = f32x4(1.0, @bitCast(f32, ~@as(u32, 0)), 0, 0); + const v1 = f32x4(1.0, 0, 0, 0); + const v = xorInt(v0, v1); + try expect(v[0] == 0.0); + try expect(@bitCast(u32, v[1]) == ~@as(u32, 0)); + try expect(v[2] == 0.0); + try expect(v[3] == 0.0); + } + { + const v0 = f32x8(0, 0, 0, 0, 1.0, @bitCast(f32, ~@as(u32, 0)), 0, 0); + const v1 = f32x8(0, 0, 0, 0, 1.0, 0, 0, 0); + const v = xorInt(v0, v1); + try expect(v[4] == 0.0); + try expect(@bitCast(u32, v[5]) == ~@as(u32, 0)); + try expect(v[6] == 0.0); + try expect(v[7] == 0.0); + } +} + +pub inline fn minFast(v0: anytype, v1: anytype) @TypeOf(v0, v1) { + return select(v0 < v1, v0, v1); // minps +} +test "zmath.minFast" { + { + const v0 = f32x4(1.0, 3.0, 2.0, 7.0); + const v1 = f32x4(2.0, 1.0, 4.0, math.inf_f32); + const v = minFast(v0, v1); + try expect(approxEqAbs(v, f32x4(1.0, 1.0, 2.0, 7.0), 0.0)); + } + { + const v0 = f32x4(1.0, math.nan_f32, 5.0, math.qnan_f32); + const v1 = f32x4(2.0, 1.0, 4.0, math.inf_f32); + const v = minFast(v0, v1); + try expect(v[0] == 1.0); + try expect(v[1] == 1.0); + try expect(!math.isNan(v[1])); + try expect(v[2] == 4.0); + try expect(v[3] == math.inf_f32); + try expect(!math.isNan(v[3])); + } +} + +pub inline fn maxFast(v0: anytype, v1: anytype) @TypeOf(v0, v1) { + return select(v0 > v1, v0, v1); // maxps +} +test "zmath.maxFast" { + { + const v0 = f32x4(1.0, 3.0, 2.0, 7.0); + const v1 = f32x4(2.0, 1.0, 4.0, math.inf_f32); + const v = maxFast(v0, v1); + try expect(approxEqAbs(v, f32x4(2.0, 3.0, 4.0, math.inf_f32), 0.0)); + } + { + const v0 = f32x4(1.0, math.nan_f32, 5.0, math.qnan_f32); + const v1 = f32x4(2.0, 1.0, 4.0, math.inf_f32); + const v = maxFast(v0, v1); + try expect(v[0] == 2.0); + try expect(v[1] == 1.0); + try expect(v[2] == 5.0); + try expect(v[3] == math.inf_f32); + try expect(!math.isNan(v[3])); + } +} + +pub inline fn min(v0: anytype, v1: anytype) @TypeOf(v0, v1) { + // This will handle inf & nan + return @min(v0, v1); // minps, cmpunordps, andps, andnps, orps +} +test "zmath.min" { + if (builtin.target.os.tag == .macos) return error.SkipZigTest; + { + const v0 = f32x4(1.0, 3.0, 2.0, 7.0); + const v1 = f32x4(2.0, 1.0, 4.0, math.inf_f32); + const v = min(v0, v1); + try expect(approxEqAbs(v, f32x4(1.0, 1.0, 2.0, 7.0), 0.0)); + } + { + const v0 = f32x8(0, 0, -2.0, 0, 1.0, 3.0, 2.0, 7.0); + const v1 = f32x8(0, 1.0, 0, 0, 2.0, 1.0, 4.0, math.inf_f32); + const v = min(v0, v1); + try expect(approxEqAbs(v, f32x8(0.0, 0.0, -2.0, 0.0, 1.0, 1.0, 2.0, 7.0), 0.0)); + } + { + const v0 = f32x4(1.0, math.nan_f32, 5.0, math.qnan_f32); + const v1 = f32x4(2.0, 1.0, 4.0, math.inf_f32); + const v = min(v0, v1); + try expect(v[0] == 1.0); + try expect(v[1] == 1.0); + try expect(!math.isNan(v[1])); + try expect(v[2] == 4.0); + try expect(v[3] == math.inf_f32); + try expect(!math.isNan(v[3])); + } + { + const v0 = f32x4(-math.inf_f32, math.inf_f32, math.inf_f32, math.qnan_f32); + const v1 = f32x4(math.qnan_f32, -math.inf_f32, math.qnan_f32, math.nan_f32); + const v = min(v0, v1); + try expect(v[0] == -math.inf_f32); + try expect(v[1] == -math.inf_f32); + try expect(v[2] == math.inf_f32); + try expect(!math.isNan(v[2])); + try expect(math.isNan(v[3])); + try expect(!math.isInf(v[3])); + } +} + +pub inline fn max(v0: anytype, v1: anytype) @TypeOf(v0, v1) { + // This will handle inf & nan + return @max(v0, v1); // maxps, cmpunordps, andps, andnps, orps +} +test "zmath.max" { + if (builtin.target.os.tag == .macos) return error.SkipZigTest; + { + const v0 = f32x4(1.0, 3.0, 2.0, 7.0); + const v1 = f32x4(2.0, 1.0, 4.0, math.inf_f32); + const v = max(v0, v1); + try expect(approxEqAbs(v, f32x4(2.0, 3.0, 4.0, math.inf_f32), 0.0)); + } + { + const v0 = f32x8(0, 0, -2.0, 0, 1.0, 3.0, 2.0, 7.0); + const v1 = f32x8(0, 1.0, 0, 0, 2.0, 1.0, 4.0, math.inf_f32); + const v = max(v0, v1); + try expect(approxEqAbs(v, f32x8(0.0, 1.0, 0.0, 0.0, 2.0, 3.0, 4.0, math.inf_f32), 0.0)); + } + { + const v0 = f32x4(1.0, math.nan_f32, 5.0, math.qnan_f32); + const v1 = f32x4(2.0, 1.0, 4.0, math.inf_f32); + const v = max(v0, v1); + try expect(v[0] == 2.0); + try expect(v[1] == 1.0); + try expect(v[2] == 5.0); + try expect(v[3] == math.inf_f32); + try expect(!math.isNan(v[3])); + } + { + const v0 = f32x4(-math.inf_f32, math.inf_f32, math.inf_f32, math.qnan_f32); + const v1 = f32x4(math.qnan_f32, -math.inf_f32, math.qnan_f32, math.nan_f32); + const v = max(v0, v1); + try expect(v[0] == -math.inf_f32); + try expect(v[1] == math.inf_f32); + try expect(v[2] == math.inf_f32); + try expect(!math.isNan(v[2])); + try expect(math.isNan(v[3])); + try expect(!math.isInf(v[3])); + } +} + +pub fn round(v: anytype) @TypeOf(v) { + const T = @TypeOf(v); + if (cpu_arch == .x86_64 and has_avx) { + if (T == F32x4) { + return asm ("vroundps $0, %%xmm0, %%xmm0" + : [ret] "={xmm0}" (-> T), + : [v] "{xmm0}" (v), + ); + } else if (T == F32x8) { + return asm ("vroundps $0, %%ymm0, %%ymm0" + : [ret] "={ymm0}" (-> T), + : [v] "{ymm0}" (v), + ); + } else if (T == F32x16 and has_avx512f) { + return asm ("vrndscaleps $0, %%zmm0, %%zmm0" + : [ret] "={zmm0}" (-> T), + : [v] "{zmm0}" (v), + ); + } else if (T == F32x16 and !has_avx512f) { + const arr: [16]f32 = v; + var ymm0 = @as(F32x8, arr[0..8].*); + var ymm1 = @as(F32x8, arr[8..16].*); + ymm0 = asm ("vroundps $0, %%ymm0, %%ymm0" + : [ret] "={ymm0}" (-> F32x8), + : [v] "{ymm0}" (ymm0), + ); + ymm1 = asm ("vroundps $0, %%ymm1, %%ymm1" + : [ret] "={ymm1}" (-> F32x8), + : [v] "{ymm1}" (ymm1), + ); + return @shuffle(f32, ymm0, ymm1, [16]i32{ 0, 1, 2, 3, 4, 5, 6, 7, -1, -2, -3, -4, -5, -6, -7, -8 }); + } + } else { + const sign = andInt(v, splatNegativeZero(T)); + const magic = orInt(splatNoFraction(T), sign); + var r1 = v + magic; + r1 = r1 - magic; + const r2 = abs(v); + const mask = r2 <= splatNoFraction(T); + return select(mask, r1, v); + } +} +test "zmath.round" { + { + try expect(all(round(splat(F32x4, math.inf_f32)) == splat(F32x4, math.inf_f32), 0)); + try expect(all(round(splat(F32x4, -math.inf_f32)) == splat(F32x4, -math.inf_f32), 0)); + try expect(all(isNan(round(splat(F32x4, math.nan_f32))), 0)); + try expect(all(isNan(round(splat(F32x4, -math.nan_f32))), 0)); + try expect(all(isNan(round(splat(F32x4, math.qnan_f32))), 0)); + try expect(all(isNan(round(splat(F32x4, -math.qnan_f32))), 0)); + } + { + var v = round(f32x16(1.1, -1.1, -1.5, 1.5, 2.1, 2.8, 2.9, 4.1, 5.8, 6.1, 7.9, 8.9, 10.1, 11.2, 12.7, 13.1)); + try expect(approxEqAbs( + v, + f32x16(1.0, -1.0, -2.0, 2.0, 2.0, 3.0, 3.0, 4.0, 6.0, 6.0, 8.0, 9.0, 10.0, 11.0, 13.0, 13.0), + 0.0, + )); + } + var v = round(f32x4(1.1, -1.1, -1.5, 1.5)); + try expect(approxEqAbs(v, f32x4(1.0, -1.0, -2.0, 2.0), 0.0)); + + const v1 = f32x4(-10_000_000.1, -math.inf_f32, 10_000_001.5, math.inf_f32); + v = round(v1); + try expect(v[3] == math.inf_f32); + try expect(approxEqAbs(v, f32x4(-10_000_000.1, -math.inf_f32, 10_000_001.5, math.inf_f32), 0.0)); + + const v2 = f32x4(-math.qnan_f32, math.qnan_f32, math.nan_f32, -math.inf_f32); + v = round(v2); + try expect(math.isNan(v2[0])); + try expect(math.isNan(v2[1])); + try expect(math.isNan(v2[2])); + try expect(v2[3] == -math.inf_f32); + + const v3 = f32x4(1001.5, -201.499, -10000.99, -101.5); + v = round(v3); + try expect(approxEqAbs(v, f32x4(1002.0, -201.0, -10001.0, -102.0), 0.0)); + + const v4 = f32x4(-1_388_609.9, 1_388_609.5, 1_388_109.01, 2_388_609.5); + v = round(v4); + try expect(approxEqAbs(v, f32x4(-1_388_610.0, 1_388_610.0, 1_388_109.0, 2_388_610.0), 0.0)); + + var f: f32 = -100.0; + var i: u32 = 0; + while (i < 100) : (i += 1) { + const vr = round(splat(F32x4, f)); + const fr = @round(splat(F32x4, f)); + const vr8 = round(splat(F32x8, f)); + const fr8 = @round(splat(F32x8, f)); + const vr16 = round(splat(F32x16, f)); + const fr16 = @round(splat(F32x16, f)); + try expect(approxEqAbs(vr, fr, 0.0)); + try expect(approxEqAbs(vr8, fr8, 0.0)); + try expect(approxEqAbs(vr16, fr16, 0.0)); + f += 0.12345 * @intToFloat(f32, i); + } +} + +pub fn trunc(v: anytype) @TypeOf(v) { + const T = @TypeOf(v); + if (cpu_arch == .x86_64 and has_avx) { + if (T == F32x4) { + return asm ("vroundps $3, %%xmm0, %%xmm0" + : [ret] "={xmm0}" (-> T), + : [v] "{xmm0}" (v), + ); + } else if (T == F32x8) { + return asm ("vroundps $3, %%ymm0, %%ymm0" + : [ret] "={ymm0}" (-> T), + : [v] "{ymm0}" (v), + ); + } else if (T == F32x16 and has_avx512f) { + return asm ("vrndscaleps $3, %%zmm0, %%zmm0" + : [ret] "={zmm0}" (-> T), + : [v] "{zmm0}" (v), + ); + } else if (T == F32x16 and !has_avx512f) { + const arr: [16]f32 = v; + var ymm0 = @as(F32x8, arr[0..8].*); + var ymm1 = @as(F32x8, arr[8..16].*); + ymm0 = asm ("vroundps $3, %%ymm0, %%ymm0" + : [ret] "={ymm0}" (-> F32x8), + : [v] "{ymm0}" (ymm0), + ); + ymm1 = asm ("vroundps $3, %%ymm1, %%ymm1" + : [ret] "={ymm1}" (-> F32x8), + : [v] "{ymm1}" (ymm1), + ); + return @shuffle(f32, ymm0, ymm1, [16]i32{ 0, 1, 2, 3, 4, 5, 6, 7, -1, -2, -3, -4, -5, -6, -7, -8 }); + } + } else { + const mask = abs(v) < splatNoFraction(T); + const result = floatToIntAndBack(v); + return select(mask, result, v); + } +} +test "zmath.trunc" { + { + try expect(all(trunc(splat(F32x4, math.inf_f32)) == splat(F32x4, math.inf_f32), 0)); + try expect(all(trunc(splat(F32x4, -math.inf_f32)) == splat(F32x4, -math.inf_f32), 0)); + try expect(all(isNan(trunc(splat(F32x4, math.nan_f32))), 0)); + try expect(all(isNan(trunc(splat(F32x4, -math.nan_f32))), 0)); + try expect(all(isNan(trunc(splat(F32x4, math.qnan_f32))), 0)); + try expect(all(isNan(trunc(splat(F32x4, -math.qnan_f32))), 0)); + } + { + var v = trunc(f32x16(1.1, -1.1, -1.5, 1.5, 2.1, 2.8, 2.9, 4.1, 5.8, 6.1, 7.9, 8.9, 10.1, 11.2, 12.7, 13.1)); + try expect(approxEqAbs( + v, + f32x16(1.0, -1.0, -1.0, 1.0, 2.0, 2.0, 2.0, 4.0, 5.0, 6.0, 7.0, 8.0, 10.0, 11.0, 12.0, 13.0), + 0.0, + )); + } + var v = trunc(f32x4(1.1, -1.1, -1.5, 1.5)); + try expect(approxEqAbs(v, f32x4(1.0, -1.0, -1.0, 1.0), 0.0)); + + v = trunc(f32x4(-10_000_002.1, -math.inf_f32, 10_000_001.5, math.inf_f32)); + try expect(approxEqAbs(v, f32x4(-10_000_002.1, -math.inf_f32, 10_000_001.5, math.inf_f32), 0.0)); + + v = trunc(f32x4(-math.qnan_f32, math.qnan_f32, math.nan_f32, -math.inf_f32)); + try expect(math.isNan(v[0])); + try expect(math.isNan(v[1])); + try expect(math.isNan(v[2])); + try expect(v[3] == -math.inf_f32); + + v = trunc(f32x4(1000.5001, -201.499, -10000.99, 100.750001)); + try expect(approxEqAbs(v, f32x4(1000.0, -201.0, -10000.0, 100.0), 0.0)); + + v = trunc(f32x4(-7_388_609.5, 7_388_609.1, 8_388_109.5, -8_388_509.5)); + try expect(approxEqAbs(v, f32x4(-7_388_609.0, 7_388_609.0, 8_388_109.0, -8_388_509.0), 0.0)); + + var f: f32 = -100.0; + var i: u32 = 0; + while (i < 100) : (i += 1) { + const vr = trunc(splat(F32x4, f)); + const fr = @trunc(splat(F32x4, f)); + const vr8 = trunc(splat(F32x8, f)); + const fr8 = @trunc(splat(F32x8, f)); + const vr16 = trunc(splat(F32x16, f)); + const fr16 = @trunc(splat(F32x16, f)); + try expect(approxEqAbs(vr, fr, 0.0)); + try expect(approxEqAbs(vr8, fr8, 0.0)); + try expect(approxEqAbs(vr16, fr16, 0.0)); + f += 0.12345 * @intToFloat(f32, i); + } +} + +pub fn floor(v: anytype) @TypeOf(v) { + const T = @TypeOf(v); + if (cpu_arch == .x86_64 and has_avx) { + if (T == F32x4) { + return asm ("vroundps $1, %%xmm0, %%xmm0" + : [ret] "={xmm0}" (-> T), + : [v] "{xmm0}" (v), + ); + } else if (T == F32x8) { + return asm ("vroundps $1, %%ymm0, %%ymm0" + : [ret] "={ymm0}" (-> T), + : [v] "{ymm0}" (v), + ); + } else if (T == F32x16 and has_avx512f) { + return asm ("vrndscaleps $1, %%zmm0, %%zmm0" + : [ret] "={zmm0}" (-> T), + : [v] "{zmm0}" (v), + ); + } else if (T == F32x16 and !has_avx512f) { + const arr: [16]f32 = v; + var ymm0 = @as(F32x8, arr[0..8].*); + var ymm1 = @as(F32x8, arr[8..16].*); + ymm0 = asm ("vroundps $1, %%ymm0, %%ymm0" + : [ret] "={ymm0}" (-> F32x8), + : [v] "{ymm0}" (ymm0), + ); + ymm1 = asm ("vroundps $1, %%ymm1, %%ymm1" + : [ret] "={ymm1}" (-> F32x8), + : [v] "{ymm1}" (ymm1), + ); + return @shuffle(f32, ymm0, ymm1, [16]i32{ 0, 1, 2, 3, 4, 5, 6, 7, -1, -2, -3, -4, -5, -6, -7, -8 }); + } + } else { + const mask = abs(v) < splatNoFraction(T); + var result = floatToIntAndBack(v); + const larger_mask = result > v; + const larger = select(larger_mask, splat(T, -1.0), splat(T, 0.0)); + result = result + larger; + return select(mask, result, v); + } +} +test "zmath.floor" { + { + try expect(all(floor(splat(F32x4, math.inf_f32)) == splat(F32x4, math.inf_f32), 0)); + try expect(all(floor(splat(F32x4, -math.inf_f32)) == splat(F32x4, -math.inf_f32), 0)); + try expect(all(isNan(floor(splat(F32x4, math.nan_f32))), 0)); + try expect(all(isNan(floor(splat(F32x4, -math.nan_f32))), 0)); + try expect(all(isNan(floor(splat(F32x4, math.qnan_f32))), 0)); + try expect(all(isNan(floor(splat(F32x4, -math.qnan_f32))), 0)); + } + { + var v = floor(f32x16(1.1, -1.1, -1.5, 1.5, 2.1, 2.8, 2.9, 4.1, 5.8, 6.1, 7.9, 8.9, 10.1, 11.2, 12.7, 13.1)); + try expect(approxEqAbs( + v, + f32x16(1.0, -2.0, -2.0, 1.0, 2.0, 2.0, 2.0, 4.0, 5.0, 6.0, 7.0, 8.0, 10.0, 11.0, 12.0, 13.0), + 0.0, + )); + } + var v = floor(f32x4(1.5, -1.5, -1.7, -2.1)); + try expect(approxEqAbs(v, f32x4(1.0, -2.0, -2.0, -3.0), 0.0)); + + v = floor(f32x4(-10_000_002.1, -math.inf_f32, 10_000_001.5, math.inf_f32)); + try expect(approxEqAbs(v, f32x4(-10_000_002.1, -math.inf_f32, 10_000_001.5, math.inf_f32), 0.0)); + + v = floor(f32x4(-math.qnan_f32, math.qnan_f32, math.nan_f32, -math.inf_f32)); + try expect(math.isNan(v[0])); + try expect(math.isNan(v[1])); + try expect(math.isNan(v[2])); + try expect(v[3] == -math.inf_f32); + + v = floor(f32x4(1000.5001, -201.499, -10000.99, 100.75001)); + try expect(approxEqAbs(v, f32x4(1000.0, -202.0, -10001.0, 100.0), 0.0)); + + v = floor(f32x4(-7_388_609.5, 7_388_609.1, 8_388_109.5, -8_388_509.5)); + try expect(approxEqAbs(v, f32x4(-7_388_610.0, 7_388_609.0, 8_388_109.0, -8_388_510.0), 0.0)); + + var f: f32 = -100.0; + var i: u32 = 0; + while (i < 100) : (i += 1) { + const vr = floor(splat(F32x4, f)); + const fr = @floor(splat(F32x4, f)); + const vr8 = floor(splat(F32x8, f)); + const fr8 = @floor(splat(F32x8, f)); + const vr16 = floor(splat(F32x16, f)); + const fr16 = @floor(splat(F32x16, f)); + try expect(approxEqAbs(vr, fr, 0.0)); + try expect(approxEqAbs(vr8, fr8, 0.0)); + try expect(approxEqAbs(vr16, fr16, 0.0)); + f += 0.12345 * @intToFloat(f32, i); + } +} + +pub fn ceil(v: anytype) @TypeOf(v) { + const T = @TypeOf(v); + if (cpu_arch == .x86_64 and has_avx) { + if (T == F32x4) { + return asm ("vroundps $2, %%xmm0, %%xmm0" + : [ret] "={xmm0}" (-> T), + : [v] "{xmm0}" (v), + ); + } else if (T == F32x8) { + return asm ("vroundps $2, %%ymm0, %%ymm0" + : [ret] "={ymm0}" (-> T), + : [v] "{ymm0}" (v), + ); + } else if (T == F32x16 and has_avx512f) { + return asm ("vrndscaleps $2, %%zmm0, %%zmm0" + : [ret] "={zmm0}" (-> T), + : [v] "{zmm0}" (v), + ); + } else if (T == F32x16 and !has_avx512f) { + const arr: [16]f32 = v; + var ymm0 = @as(F32x8, arr[0..8].*); + var ymm1 = @as(F32x8, arr[8..16].*); + ymm0 = asm ("vroundps $2, %%ymm0, %%ymm0" + : [ret] "={ymm0}" (-> F32x8), + : [v] "{ymm0}" (ymm0), + ); + ymm1 = asm ("vroundps $2, %%ymm1, %%ymm1" + : [ret] "={ymm1}" (-> F32x8), + : [v] "{ymm1}" (ymm1), + ); + return @shuffle(f32, ymm0, ymm1, [16]i32{ 0, 1, 2, 3, 4, 5, 6, 7, -1, -2, -3, -4, -5, -6, -7, -8 }); + } + } else { + const mask = abs(v) < splatNoFraction(T); + var result = floatToIntAndBack(v); + const smaller_mask = result < v; + const smaller = select(smaller_mask, splat(T, -1.0), splat(T, 0.0)); + result = result - smaller; + return select(mask, result, v); + } +} +test "zmath.ceil" { + { + try expect(all(ceil(splat(F32x4, math.inf_f32)) == splat(F32x4, math.inf_f32), 0)); + try expect(all(ceil(splat(F32x4, -math.inf_f32)) == splat(F32x4, -math.inf_f32), 0)); + try expect(all(isNan(ceil(splat(F32x4, math.nan_f32))), 0)); + try expect(all(isNan(ceil(splat(F32x4, -math.nan_f32))), 0)); + try expect(all(isNan(ceil(splat(F32x4, math.qnan_f32))), 0)); + try expect(all(isNan(ceil(splat(F32x4, -math.qnan_f32))), 0)); + } + { + var v = ceil(f32x16(1.1, -1.1, -1.5, 1.5, 2.1, 2.8, 2.9, 4.1, 5.8, 6.1, 7.9, 8.9, 10.1, 11.2, 12.7, 13.1)); + try expect(approxEqAbs( + v, + f32x16(2.0, -1.0, -1.0, 2.0, 3.0, 3.0, 3.0, 5.0, 6.0, 7.0, 8.0, 9.0, 11.0, 12.0, 13.0, 14.0), + 0.0, + )); + } + var v = ceil(f32x4(1.5, -1.5, -1.7, -2.1)); + try expect(approxEqAbs(v, f32x4(2.0, -1.0, -1.0, -2.0), 0.0)); + + v = ceil(f32x4(-10_000_002.1, -math.inf_f32, 10_000_001.5, math.inf_f32)); + try expect(approxEqAbs(v, f32x4(-10_000_002.1, -math.inf_f32, 10_000_001.5, math.inf_f32), 0.0)); + + v = ceil(f32x4(-math.qnan_f32, math.qnan_f32, math.nan_f32, -math.inf_f32)); + try expect(math.isNan(v[0])); + try expect(math.isNan(v[1])); + try expect(math.isNan(v[2])); + try expect(v[3] == -math.inf_f32); + + v = ceil(f32x4(1000.5001, -201.499, -10000.99, 100.75001)); + try expect(approxEqAbs(v, f32x4(1001.0, -201.0, -10000.0, 101.0), 0.0)); + + v = ceil(f32x4(-1_388_609.5, 1_388_609.1, 1_388_109.9, -1_388_509.9)); + try expect(approxEqAbs(v, f32x4(-1_388_609.0, 1_388_610.0, 1_388_110.0, -1_388_509.0), 0.0)); + + var f: f32 = -100.0; + var i: u32 = 0; + while (i < 100) : (i += 1) { + const vr = ceil(splat(F32x4, f)); + const fr = @ceil(splat(F32x4, f)); + const vr8 = ceil(splat(F32x8, f)); + const fr8 = @ceil(splat(F32x8, f)); + const vr16 = ceil(splat(F32x16, f)); + const fr16 = @ceil(splat(F32x16, f)); + try expect(approxEqAbs(vr, fr, 0.0)); + try expect(approxEqAbs(vr8, fr8, 0.0)); + try expect(approxEqAbs(vr16, fr16, 0.0)); + f += 0.12345 * @intToFloat(f32, i); + } +} + +pub inline fn clamp(v: anytype, vmin: anytype, vmax: anytype) @TypeOf(v, vmin, vmax) { + var result = max(vmin, v); + result = min(vmax, result); + return result; +} +test "zmath.clamp" { + if (builtin.target.os.tag == .macos) return error.SkipZigTest; + { + const v0 = f32x4(-1.0, 0.2, 1.1, -0.3); + const v = clamp(v0, splat(F32x4, -0.5), splat(F32x4, 0.5)); + try expect(approxEqAbs(v, f32x4(-0.5, 0.2, 0.5, -0.3), 0.0001)); + } + { + const v0 = f32x8(-2.0, 0.25, -0.25, 100.0, -1.0, 0.2, 1.1, -0.3); + const v = clamp(v0, splat(F32x8, -0.5), splat(F32x8, 0.5)); + try expect(approxEqAbs(v, f32x8(-0.5, 0.25, -0.25, 0.5, -0.5, 0.2, 0.5, -0.3), 0.0001)); + } + { + const v0 = f32x4(-math.inf_f32, math.inf_f32, math.nan_f32, math.qnan_f32); + const v = clamp(v0, f32x4(-100.0, 0.0, -100.0, 0.0), f32x4(0.0, 100.0, 0.0, 100.0)); + try expect(approxEqAbs(v, f32x4(-100.0, 100.0, -100.0, 0.0), 0.0001)); + } + { + const v0 = f32x4(math.inf_f32, math.inf_f32, -math.nan_f32, -math.qnan_f32); + const v = clamp(v0, splat(F32x4, -1.0), splat(F32x4, 1.0)); + try expect(approxEqAbs(v, f32x4(1.0, 1.0, -1.0, -1.0), 0.0001)); + } +} + +pub inline fn clampFast(v: anytype, vmin: anytype, vmax: anytype) @TypeOf(v, vmin, vmax) { + var result = maxFast(vmin, v); + result = minFast(vmax, result); + return result; +} +test "zmath.clampFast" { + { + const v0 = f32x4(-1.0, 0.2, 1.1, -0.3); + const v = clampFast(v0, splat(F32x4, -0.5), splat(F32x4, 0.5)); + try expect(approxEqAbs(v, f32x4(-0.5, 0.2, 0.5, -0.3), 0.0001)); + } +} + +pub inline fn saturate(v: anytype) @TypeOf(v) { + const T = @TypeOf(v); + var result = max(v, splat(T, 0.0)); + result = min(result, splat(T, 1.0)); + return result; +} +test "zmath.saturate" { + if (builtin.target.os.tag == .macos) return error.SkipZigTest; + { + const v0 = f32x4(-1.0, 0.2, 1.1, -0.3); + const v = saturate(v0); + try expect(approxEqAbs(v, f32x4(0.0, 0.2, 1.0, 0.0), 0.0001)); + } + { + const v0 = f32x8(0.0, 0.0, 2.0, -2.0, -1.0, 0.2, 1.1, -0.3); + const v = saturate(v0); + try expect(approxEqAbs(v, f32x8(0.0, 0.0, 1.0, 0.0, 0.0, 0.2, 1.0, 0.0), 0.0001)); + } + { + const v0 = f32x4(-math.inf_f32, math.inf_f32, math.nan_f32, math.qnan_f32); + const v = saturate(v0); + try expect(approxEqAbs(v, f32x4(0.0, 1.0, 0.0, 0.0), 0.0001)); + } + { + const v0 = f32x4(math.inf_f32, math.inf_f32, -math.nan_f32, -math.qnan_f32); + const v = saturate(v0); + try expect(approxEqAbs(v, f32x4(1.0, 1.0, 0.0, 0.0), 0.0001)); + } +} + +pub inline fn saturateFast(v: anytype) @TypeOf(v) { + const T = @TypeOf(v); + var result = maxFast(v, splat(T, 0.0)); + result = minFast(result, splat(T, 1.0)); + return result; +} +test "zmath.saturateFast" { + { + const v0 = f32x4(-1.0, 0.2, 1.1, -0.3); + const v = saturateFast(v0); + try expect(approxEqAbs(v, f32x4(0.0, 0.2, 1.0, 0.0), 0.0001)); + } + { + const v0 = f32x8(0.0, 0.0, 2.0, -2.0, -1.0, 0.2, 1.1, -0.3); + const v = saturateFast(v0); + try expect(approxEqAbs(v, f32x8(0.0, 0.0, 1.0, 0.0, 0.0, 0.2, 1.0, 0.0), 0.0001)); + } + { + const v0 = f32x4(-math.inf_f32, math.inf_f32, math.nan_f32, math.qnan_f32); + const v = saturateFast(v0); + try expect(approxEqAbs(v, f32x4(0.0, 1.0, 0.0, 0.0), 0.0001)); + } + { + const v0 = f32x4(math.inf_f32, math.inf_f32, -math.nan_f32, -math.qnan_f32); + const v = saturateFast(v0); + try expect(approxEqAbs(v, f32x4(1.0, 1.0, 0.0, 0.0), 0.0001)); + } +} + +pub inline fn sqrt(v: anytype) @TypeOf(v) { + return @sqrt(v); // sqrtps +} + +pub inline fn abs(v: anytype) @TypeOf(v) { + return @fabs(v); // load, andps +} + +pub inline fn select(mask: anytype, v0: anytype, v1: anytype) @TypeOf(v0, v1) { + return @select(f32, mask, v0, v1); +} + +pub inline fn lerp(v0: anytype, v1: anytype, t: f32) @TypeOf(v0, v1) { + const T = @TypeOf(v0, v1); + return v0 + (v1 - v0) * splat(T, t); // subps, shufps, addps, mulps +} + +pub inline fn lerpV(v0: anytype, v1: anytype, t: anytype) @TypeOf(v0, v1, t) { + return v0 + (v1 - v0) * t; // subps, addps, mulps +} + +pub inline fn lerpInverse(v0: anytype, v1: anytype, t: anytype) @TypeOf(v0, v1) { + const T = @TypeOf(v0, v1); + return (splat(T, t) - v0) / (v1 - v0); +} + +pub inline fn lerpInverseV(v0: anytype, v1: anytype, t: anytype) @TypeOf(v0, v1, t) { + return (t - v0) / (v1 - v0); +} +test "zmath.lerpInverse" { + try expect(math.approxEqAbs(f32, lerpInverseV(10.0, 100.0, 10.0), 0, 0.0005)); + try expect(math.approxEqAbs(f32, lerpInverseV(10.0, 100.0, 100.0), 1, 0.0005)); + try expect(math.approxEqAbs(f32, lerpInverseV(10.0, 100.0, 55.0), 0.5, 0.05)); + try expect(approxEqAbs(lerpInverse(f32x4(0, 0, 10, 10), f32x4(100, 200, 100, 100), 10.0), f32x4(0.1, 0.05, 0, 0), 0.0005)); +} + +/// To transform a vector of values from one range to another. +pub inline fn mapLinear(v: anytype, min1: anytype, max1: anytype, min2: anytype, max2: anytype) @TypeOf(v) { + const T = @TypeOf(v); + const min1V = splat(T, min1); + const max1V = splat(T, max1); + const min2V = splat(T, min2); + const max2V = splat(T, max2); + const dV = max1V - min1V; + return min2V + (v - min1V) * (max2V - min2V) / dV; +} + +pub inline fn mapLinearV(v: anytype, min1: anytype, max1: anytype, min2: anytype, max2: anytype) @TypeOf(v, min1, max1, min2, max2) { + const d = max1 - min1; + return min2 + (v - min1) * (max2 - min2) / d; +} +test "zmath.mapLinear" { + try expect(math.approxEqAbs(f32, mapLinearV(0, 0, 1.2, 10, 100), 10, 0.0005)); + try expect(math.approxEqAbs(f32, mapLinearV(1.2, 0, 1.2, 10, 100), 100, 0.0005)); + try expect(math.approxEqAbs(f32, mapLinearV(0.6, 0, 1.2, 10, 100), 55, 0.0005)); + try expect(approxEqAbs(mapLinearV(splat(F32x4, 0), splat(F32x4, 0), splat(F32x4, 1.2), splat(F32x4, 10), splat(F32x4, 100)), splat(F32x4, 10), 0.0005)); + try expect(approxEqAbs(mapLinear(f32x4(0, 0, 0.6, 1.2), 0, 1.2, 10, 100), f32x4(10, 10, 55, 100), 0.0005)); +} + +pub const F32x4Component = enum { x, y, z, w }; + +pub inline fn swizzle( + v: F32x4, + comptime x: F32x4Component, + comptime y: F32x4Component, + comptime z: F32x4Component, + comptime w: F32x4Component, +) F32x4 { + return @shuffle(f32, v, undefined, [4]i32{ @enumToInt(x), @enumToInt(y), @enumToInt(z), @enumToInt(w) }); +} + +pub inline fn mod(v0: anytype, v1: anytype) @TypeOf(v0, v1) { + // vdivps, vroundps, vmulps, vsubps + return v0 - v1 * trunc(v0 / v1); +} +test "zmath.mod" { + if (builtin.target.os.tag == .macos and builtin.zig_backend != .stage1) return error.SkipZigTest; + try expect(approxEqAbs(mod(splat(F32x4, 3.1), splat(F32x4, 1.7)), splat(F32x4, 1.4), 0.0005)); + try expect(approxEqAbs(mod(splat(F32x4, -3.0), splat(F32x4, 2.0)), splat(F32x4, -1.0), 0.0005)); + try expect(approxEqAbs(mod(splat(F32x4, -3.0), splat(F32x4, -2.0)), splat(F32x4, -1.0), 0.0005)); + try expect(approxEqAbs(mod(splat(F32x4, 3.0), splat(F32x4, -2.0)), splat(F32x4, 1.0), 0.0005)); + try expect(all(isNan(mod(splat(F32x4, math.inf_f32), splat(F32x4, 1.0))), 0)); + try expect(all(isNan(mod(splat(F32x4, -math.inf_f32), splat(F32x4, 123.456))), 0)); + try expect(all(isNan(mod(splat(F32x4, math.nan_f32), splat(F32x4, 123.456))), 0)); + try expect(all(isNan(mod(splat(F32x4, math.qnan_f32), splat(F32x4, 123.456))), 0)); + try expect(all(isNan(mod(splat(F32x4, -math.qnan_f32), splat(F32x4, 123.456))), 0)); + try expect(all(isNan(mod(splat(F32x4, 123.456), splat(F32x4, math.inf_f32))), 0)); + try expect(all(isNan(mod(splat(F32x4, 123.456), splat(F32x4, -math.inf_f32))), 0)); + try expect(all(isNan(mod(splat(F32x4, math.inf_f32), splat(F32x4, math.inf_f32))), 0)); + try expect(all(isNan(mod(splat(F32x4, 123.456), splat(F32x4, math.nan_f32))), 0)); + try expect(all(isNan(mod(splat(F32x4, math.inf_f32), splat(F32x4, math.nan_f32))), 0)); +} + +pub fn modAngle(v: anytype) @TypeOf(v) { + const T = @TypeOf(v); + return switch (T) { + f32 => modAngle32(v), + F32x4, F32x8, F32x16 => modAngle32xN(v), + else => @compileError("zmath.modAngle() not implemented for " ++ @typeName(T)), + }; +} + +pub inline fn modAngle32xN(v: anytype) @TypeOf(v) { + const T = @TypeOf(v); + return v - splat(T, math.tau) * round(v * splat(T, 1.0 / math.tau)); // 2 x vmulps, 2 x load, vroundps, vaddps +} +test "zmath.modAngle" { + try expect(approxEqAbs(modAngle(splat(F32x4, math.tau)), splat(F32x4, 0.0), 0.0005)); + try expect(approxEqAbs(modAngle(splat(F32x4, 0.0)), splat(F32x4, 0.0), 0.0005)); + try expect(approxEqAbs(modAngle(splat(F32x4, math.pi)), splat(F32x4, math.pi), 0.0005)); + try expect(approxEqAbs(modAngle(splat(F32x4, 11 * math.pi)), splat(F32x4, math.pi), 0.0005)); + try expect(approxEqAbs(modAngle(splat(F32x4, 3.5 * math.pi)), splat(F32x4, -0.5 * math.pi), 0.0005)); + try expect(approxEqAbs(modAngle(splat(F32x4, 2.5 * math.pi)), splat(F32x4, 0.5 * math.pi), 0.0005)); +} + +pub inline fn mulAdd(v0: anytype, v1: anytype, v2: anytype) @TypeOf(v0, v1, v2) { + const T = @TypeOf(v0, v1, v2); + if (@import("zmath_options").enable_cross_platform_determinism) { + return v0 * v1 + v2; // Compiler will generate mul, add sequence (no fma even if the target supports it). + } else { + if (cpu_arch == .x86_64 and has_avx and has_fma) { + return @mulAdd(T, v0, v1, v2); + } else { + // NOTE(mziulek): On .x86_64 without HW fma instructions @mulAdd maps to really slow code! + return v0 * v1 + v2; + } + } +} + +fn sin32xN(v: anytype) @TypeOf(v) { + // 11-degree minimax approximation + const T = @TypeOf(v); + + var x = modAngle(v); + const sign = andInt(x, splatNegativeZero(T)); + const c = orInt(sign, splat(T, math.pi)); + const absx = andNotInt(sign, x); + const rflx = c - x; + const comp = absx <= splat(T, 0.5 * math.pi); + x = select(comp, x, rflx); + const x2 = x * x; + + var result = mulAdd(splat(T, -2.3889859e-08), x2, splat(T, 2.7525562e-06)); + result = mulAdd(result, x2, splat(T, -0.00019840874)); + result = mulAdd(result, x2, splat(T, 0.0083333310)); + result = mulAdd(result, x2, splat(T, -0.16666667)); + result = mulAdd(result, x2, splat(T, 1.0)); + return x * result; +} +test "zmath.sin" { + const epsilon = 0.0001; + + try expect(approxEqAbs(sin(splat(F32x4, 0.5 * math.pi)), splat(F32x4, 1.0), epsilon)); + try expect(approxEqAbs(sin(splat(F32x4, 0.0)), splat(F32x4, 0.0), epsilon)); + try expect(approxEqAbs(sin(splat(F32x4, -0.0)), splat(F32x4, -0.0), epsilon)); + try expect(approxEqAbs(sin(splat(F32x4, 89.123)), splat(F32x4, 0.916166), epsilon)); + try expect(approxEqAbs(sin(splat(F32x8, 89.123)), splat(F32x8, 0.916166), epsilon)); + try expect(approxEqAbs(sin(splat(F32x16, 89.123)), splat(F32x16, 0.916166), epsilon)); + try expect(all(isNan(sin(splat(F32x4, math.inf_f32))), 0) == true); + try expect(all(isNan(sin(splat(F32x4, -math.inf_f32))), 0) == true); + try expect(all(isNan(sin(splat(F32x4, math.nan_f32))), 0) == true); + try expect(all(isNan(sin(splat(F32x4, math.qnan_f32))), 0) == true); + + var f: f32 = -100.0; + var i: u32 = 0; + while (i < 100) : (i += 1) { + const vr = sin(splat(F32x4, f)); + const fr = @sin(splat(F32x4, f)); + const vr8 = sin(splat(F32x8, f)); + const fr8 = @sin(splat(F32x8, f)); + const vr16 = sin(splat(F32x16, f)); + const fr16 = @sin(splat(F32x16, f)); + try expect(approxEqAbs(vr, fr, epsilon)); + try expect(approxEqAbs(vr8, fr8, epsilon)); + try expect(approxEqAbs(vr16, fr16, epsilon)); + f += 0.12345 * @intToFloat(f32, i); + } +} + +fn cos32xN(v: anytype) @TypeOf(v) { + // 10-degree minimax approximation + const T = @TypeOf(v); + + var x = modAngle(v); + var sign = andInt(x, splatNegativeZero(T)); + const c = orInt(sign, splat(T, math.pi)); + const absx = andNotInt(sign, x); + const rflx = c - x; + const comp = absx <= splat(T, 0.5 * math.pi); + x = select(comp, x, rflx); + sign = select(comp, splat(T, 1.0), splat(T, -1.0)); + const x2 = x * x; + + var result = mulAdd(splat(T, -2.6051615e-07), x2, splat(T, 2.4760495e-05)); + result = mulAdd(result, x2, splat(T, -0.0013888378)); + result = mulAdd(result, x2, splat(T, 0.041666638)); + result = mulAdd(result, x2, splat(T, -0.5)); + result = mulAdd(result, x2, splat(T, 1.0)); + return sign * result; +} +test "zmath.cos" { + const epsilon = 0.0001; + + try expect(approxEqAbs(cos(splat(F32x4, 0.5 * math.pi)), splat(F32x4, 0.0), epsilon)); + try expect(approxEqAbs(cos(splat(F32x4, 0.0)), splat(F32x4, 1.0), epsilon)); + try expect(approxEqAbs(cos(splat(F32x4, -0.0)), splat(F32x4, 1.0), epsilon)); + try expect(all(isNan(cos(splat(F32x4, math.inf_f32))), 0) == true); + try expect(all(isNan(cos(splat(F32x4, -math.inf_f32))), 0) == true); + try expect(all(isNan(cos(splat(F32x4, math.nan_f32))), 0) == true); + try expect(all(isNan(cos(splat(F32x4, math.qnan_f32))), 0) == true); + + var f: f32 = -100.0; + var i: u32 = 0; + while (i < 100) : (i += 1) { + const vr = cos(splat(F32x4, f)); + const fr = @cos(splat(F32x4, f)); + const vr8 = cos(splat(F32x8, f)); + const fr8 = @cos(splat(F32x8, f)); + const vr16 = cos(splat(F32x16, f)); + const fr16 = @cos(splat(F32x16, f)); + try expect(approxEqAbs(vr, fr, epsilon)); + try expect(approxEqAbs(vr8, fr8, epsilon)); + try expect(approxEqAbs(vr16, fr16, epsilon)); + f += 0.12345 * @intToFloat(f32, i); + } +} + +pub fn sin(v: anytype) @TypeOf(v) { + const T = @TypeOf(v); + return switch (T) { + f32 => sin32(v), + F32x4, F32x8, F32x16 => sin32xN(v), + else => @compileError("zmath.sin() not implemented for " ++ @typeName(T)), + }; +} + +pub fn cos(v: anytype) @TypeOf(v) { + const T = @TypeOf(v); + return switch (T) { + f32 => cos32(v), + F32x4, F32x8, F32x16 => cos32xN(v), + else => @compileError("zmath.cos() not implemented for " ++ @typeName(T)), + }; +} + +pub fn sincos(v: anytype) [2]@TypeOf(v) { + const T = @TypeOf(v); + return switch (T) { + f32 => sincos32(v), + F32x4, F32x8, F32x16 => sincos32xN(v), + else => @compileError("zmath.sincos() not implemented for " ++ @typeName(T)), + }; +} + +pub fn asin(v: anytype) @TypeOf(v) { + const T = @TypeOf(v); + return switch (T) { + f32 => asin32(v), + F32x4, F32x8, F32x16 => asin32xN(v), + else => @compileError("zmath.asin() not implemented for " ++ @typeName(T)), + }; +} + +pub fn acos(v: anytype) @TypeOf(v) { + const T = @TypeOf(v); + return switch (T) { + f32 => acos32(v), + F32x4, F32x8, F32x16 => acos32xN(v), + else => @compileError("zmath.acos() not implemented for " ++ @typeName(T)), + }; +} + +fn sincos32xN(v: anytype) [2]@TypeOf(v) { + const T = @TypeOf(v); + + var x = modAngle(v); + var sign = andInt(x, splatNegativeZero(T)); + const c = orInt(sign, splat(T, math.pi)); + const absx = andNotInt(sign, x); + const rflx = c - x; + const comp = absx <= splat(T, 0.5 * math.pi); + x = select(comp, x, rflx); + sign = select(comp, splat(T, 1.0), splat(T, -1.0)); + const x2 = x * x; + + var sresult = mulAdd(splat(T, -2.3889859e-08), x2, splat(T, 2.7525562e-06)); + sresult = mulAdd(sresult, x2, splat(T, -0.00019840874)); + sresult = mulAdd(sresult, x2, splat(T, 0.0083333310)); + sresult = mulAdd(sresult, x2, splat(T, -0.16666667)); + sresult = x * mulAdd(sresult, x2, splat(T, 1.0)); + + var cresult = mulAdd(splat(T, -2.6051615e-07), x2, splat(T, 2.4760495e-05)); + cresult = mulAdd(cresult, x2, splat(T, -0.0013888378)); + cresult = mulAdd(cresult, x2, splat(T, 0.041666638)); + cresult = mulAdd(cresult, x2, splat(T, -0.5)); + cresult = sign * mulAdd(cresult, x2, splat(T, 1.0)); + + return .{ sresult, cresult }; +} +test "zmath.sincos32xN" { + const epsilon = 0.0001; + + var f: f32 = -100.0; + var i: u32 = 0; + while (i < 100) : (i += 1) { + const sc = sincos(splat(F32x4, f)); + const sc8 = sincos(splat(F32x8, f)); + const sc16 = sincos(splat(F32x16, f)); + const s4 = @sin(splat(F32x4, f)); + const s8 = @sin(splat(F32x8, f)); + const s16 = @sin(splat(F32x16, f)); + const c4 = @cos(splat(F32x4, f)); + const c8 = @cos(splat(F32x8, f)); + const c16 = @cos(splat(F32x16, f)); + try expect(approxEqAbs(sc[0], s4, epsilon)); + try expect(approxEqAbs(sc8[0], s8, epsilon)); + try expect(approxEqAbs(sc16[0], s16, epsilon)); + try expect(approxEqAbs(sc[1], c4, epsilon)); + try expect(approxEqAbs(sc8[1], c8, epsilon)); + try expect(approxEqAbs(sc16[1], c16, epsilon)); + f += 0.12345 * @intToFloat(f32, i); + } +} + +fn asin32xN(v: anytype) @TypeOf(v) { + // 7-degree minimax approximation + const T = @TypeOf(v); + + const x = abs(v); + const root = sqrt(maxFast(splat(T, 0.0), splat(T, 1.0) - x)); + + var t0 = mulAdd(splat(T, -0.0012624911), x, splat(T, 0.0066700901)); + t0 = mulAdd(t0, x, splat(T, -0.0170881256)); + t0 = mulAdd(t0, x, splat(T, 0.0308918810)); + t0 = mulAdd(t0, x, splat(T, -0.0501743046)); + t0 = mulAdd(t0, x, splat(T, 0.0889789874)); + t0 = mulAdd(t0, x, splat(T, -0.2145988016)); + t0 = root * mulAdd(t0, x, splat(T, 1.5707963050)); + + const t1 = splat(T, math.pi) - t0; + return splat(T, 0.5 * math.pi) - select(v >= splat(T, 0.0), t0, t1); +} + +fn acos32xN(v: anytype) @TypeOf(v) { + // 7-degree minimax approximation + const T = @TypeOf(v); + + const x = abs(v); + const root = sqrt(maxFast(splat(T, 0.0), splat(T, 1.0) - x)); + + var t0 = mulAdd(splat(T, -0.0012624911), x, splat(T, 0.0066700901)); + t0 = mulAdd(t0, x, splat(T, -0.0170881256)); + t0 = mulAdd(t0, x, splat(T, 0.0308918810)); + t0 = mulAdd(t0, x, splat(T, -0.0501743046)); + t0 = mulAdd(t0, x, splat(T, 0.0889789874)); + t0 = mulAdd(t0, x, splat(T, -0.2145988016)); + t0 = root * mulAdd(t0, x, splat(T, 1.5707963050)); + + const t1 = splat(T, math.pi) - t0; + return select(v >= splat(T, 0.0), t0, t1); +} + +pub fn atan(v: anytype) @TypeOf(v) { + // 17-degree minimax approximation + const T = @TypeOf(v); + + const vabs = abs(v); + const vinv = splat(T, 1.0) / v; + var sign = select(v > splat(T, 1.0), splat(T, 1.0), splat(T, -1.0)); + const comp = vabs <= splat(T, 1.0); + sign = select(comp, splat(T, 0.0), sign); + const x = select(comp, v, vinv); + const x2 = x * x; + + var result = mulAdd(splat(T, 0.0028662257), x2, splat(T, -0.0161657367)); + result = mulAdd(result, x2, splat(T, 0.0429096138)); + result = mulAdd(result, x2, splat(T, -0.0752896400)); + result = mulAdd(result, x2, splat(T, 0.1065626393)); + result = mulAdd(result, x2, splat(T, -0.1420889944)); + result = mulAdd(result, x2, splat(T, 0.1999355085)); + result = mulAdd(result, x2, splat(T, -0.3333314528)); + result = x * mulAdd(result, x2, splat(T, 1.0)); + + const result1 = sign * splat(T, 0.5 * math.pi) - result; + return select(sign == splat(T, 0.0), result, result1); +} +test "zmath.atan" { + const epsilon = 0.0001; + { + const v = f32x4(0.25, 0.5, 1.0, 1.25); + const e = f32x4(math.atan(v[0]), math.atan(v[1]), math.atan(v[2]), math.atan(v[3])); + try expect(approxEqAbs(e, atan(v), epsilon)); + } + { + const v = f32x8(-0.25, 0.5, -1.0, 1.25, 100.0, -200.0, 300.0, 400.0); + // zig fmt: off + const e = f32x8( + math.atan(v[0]), math.atan(v[1]), math.atan(v[2]), math.atan(v[3]), + math.atan(v[4]), math.atan(v[5]), math.atan(v[6]), math.atan(v[7]), + ); + // zig fmt: on + try expect(approxEqAbs(e, atan(v), epsilon)); + } + { + // zig fmt: off + const v = f32x16( + -0.25, 0.5, -1.0, 0.0, 0.1, -0.2, 30.0, 400.0, + -0.25, 0.5, -1.0, -0.0, -0.05, -0.125, 0.0625, 4000.0 + ); + const e = f32x16( + math.atan(v[0]), math.atan(v[1]), math.atan(v[2]), math.atan(v[3]), + math.atan(v[4]), math.atan(v[5]), math.atan(v[6]), math.atan(v[7]), + math.atan(v[8]), math.atan(v[9]), math.atan(v[10]), math.atan(v[11]), + math.atan(v[12]), math.atan(v[13]), math.atan(v[14]), math.atan(v[15]), + ); + // zig fmt: on + try expect(approxEqAbs(e, atan(v), epsilon)); + } + { + try expect(approxEqAbs(atan(splat(F32x4, math.inf_f32)), splat(F32x4, 0.5 * math.pi), epsilon)); + try expect(approxEqAbs(atan(splat(F32x4, -math.inf_f32)), splat(F32x4, -0.5 * math.pi), epsilon)); + try expect(all(isNan(atan(splat(F32x4, math.nan_f32))), 0) == true); + try expect(all(isNan(atan(splat(F32x4, -math.nan_f32))), 0) == true); + } +} + +pub fn atan2(vy: anytype, vx: anytype) @TypeOf(vx, vy) { + const T = @TypeOf(vx, vy); + const Tu = @Vector(veclen(T), u32); + + const vx_is_positive = + (@bitCast(Tu, vx) & @splat(veclen(T), @as(u32, 0x8000_0000))) == @splat(veclen(T), @as(u32, 0)); + + const vy_sign = andInt(vy, splatNegativeZero(T)); + const c0_25pi = orInt(vy_sign, splat(T, 0.25 * math.pi)); + const c0_50pi = orInt(vy_sign, splat(T, 0.50 * math.pi)); + const c0_75pi = orInt(vy_sign, splat(T, 0.75 * math.pi)); + const c1_00pi = orInt(vy_sign, splat(T, 1.00 * math.pi)); + + var r1 = select(vx_is_positive, vy_sign, c1_00pi); + var r2 = select(vx == splat(T, 0.0), c0_50pi, splatInt(T, 0xffff_ffff)); + const r3 = select(vy == splat(T, 0.0), r1, r2); + const r4 = select(vx_is_positive, c0_25pi, c0_75pi); + const r5 = select(isInf(vx), r4, c0_50pi); + const result = select(isInf(vy), r5, r3); + const result_valid = @bitCast(Tu, result) == @splat(veclen(T), @as(u32, 0xffff_ffff)); + + const v = vy / vx; + const r0 = atan(v); + + r1 = select(vx_is_positive, splatNegativeZero(T), c1_00pi); + r2 = r0 + r1; + + return select(result_valid, r2, result); +} +test "zmath.atan2" { + // From DirectXMath XMVectorATan2(): + // + // Return the inverse tangent of Y / X in the range of -Pi to Pi with the following exceptions: + + // Y == 0 and X is Negative -> Pi with the sign of Y + // y == 0 and x is positive -> 0 with the sign of y + // Y != 0 and X == 0 -> Pi / 2 with the sign of Y + // Y != 0 and X is Negative -> atan(y/x) + (PI with the sign of Y) + // X == -Infinity and Finite Y -> Pi with the sign of Y + // X == +Infinity and Finite Y -> 0 with the sign of Y + // Y == Infinity and X is Finite -> Pi / 2 with the sign of Y + // Y == Infinity and X == -Infinity -> 3Pi / 4 with the sign of Y + // Y == Infinity and X == +Infinity -> Pi / 4 with the sign of Y + + const epsilon = 0.0001; + try expect(approxEqAbs(atan2(splat(F32x4, 0.0), splat(F32x4, -1.0)), splat(F32x4, math.pi), epsilon)); + try expect(approxEqAbs(atan2(splat(F32x4, -0.0), splat(F32x4, -1.0)), splat(F32x4, -math.pi), epsilon)); + try expect(approxEqAbs(atan2(splat(F32x4, 1.0), splat(F32x4, 0.0)), splat(F32x4, 0.5 * math.pi), epsilon)); + try expect(approxEqAbs(atan2(splat(F32x4, -1.0), splat(F32x4, 0.0)), splat(F32x4, -0.5 * math.pi), epsilon)); + try expect(approxEqAbs( + atan2(splat(F32x4, 1.0), splat(F32x4, -1.0)), + splat(F32x4, math.atan(@as(f32, -1.0)) + math.pi), + epsilon, + )); + try expect(approxEqAbs( + atan2(splat(F32x4, -10.0), splat(F32x4, -2.0)), + splat(F32x4, math.atan(@as(f32, 5.0)) - math.pi), + epsilon, + )); + try expect(approxEqAbs(atan2(splat(F32x4, 1.0), splat(F32x4, -math.inf_f32)), splat(F32x4, math.pi), epsilon)); + try expect(approxEqAbs(atan2(splat(F32x4, -1.0), splat(F32x4, -math.inf_f32)), splat(F32x4, -math.pi), epsilon)); + try expect(approxEqAbs(atan2(splat(F32x4, 1.0), splat(F32x4, math.inf_f32)), splat(F32x4, 0.0), epsilon)); + try expect(approxEqAbs(atan2(splat(F32x4, -1.0), splat(F32x4, math.inf_f32)), splat(F32x4, -0.0), epsilon)); + try expect(approxEqAbs( + atan2(splat(F32x4, math.inf_f32), splat(F32x4, 2.0)), + splat(F32x4, 0.5 * math.pi), + epsilon, + )); + try expect(approxEqAbs( + atan2(splat(F32x4, -math.inf_f32), splat(F32x4, 2.0)), + splat(F32x4, -0.5 * math.pi), + epsilon, + )); + try expect(approxEqAbs( + atan2(splat(F32x4, math.inf_f32), splat(F32x4, -math.inf_f32)), + splat(F32x4, 0.75 * math.pi), + epsilon, + )); + try expect(approxEqAbs( + atan2(splat(F32x4, -math.inf_f32), splat(F32x4, -math.inf_f32)), + splat(F32x4, -0.75 * math.pi), + epsilon, + )); + try expect(approxEqAbs( + atan2(splat(F32x4, math.inf_f32), splat(F32x4, math.inf_f32)), + splat(F32x4, 0.25 * math.pi), + epsilon, + )); + try expect(approxEqAbs( + atan2(splat(F32x4, -math.inf_f32), splat(F32x4, math.inf_f32)), + splat(F32x4, -0.25 * math.pi), + epsilon, + )); + try expect(approxEqAbs( + atan2( + f32x8(0.0, -math.inf_f32, -0.0, 2.0, math.inf_f32, math.inf_f32, 1.0, -math.inf_f32), + f32x8(-2.0, math.inf_f32, 1.0, 0.0, 10.0, -math.inf_f32, 1.0, -math.inf_f32), + ), + f32x8( + math.pi, + -0.25 * math.pi, + -0.0, + 0.5 * math.pi, + 0.5 * math.pi, + 0.75 * math.pi, + math.atan(@as(f32, 1.0)), + -0.75 * math.pi, + ), + epsilon, + )); + try expect(approxEqAbs(atan2(splat(F32x4, 0.0), splat(F32x4, 0.0)), splat(F32x4, 0.0), epsilon)); + try expect(approxEqAbs(atan2(splat(F32x4, -0.0), splat(F32x4, 0.0)), splat(F32x4, 0.0), epsilon)); + try expect(all(isNan(atan2(splat(F32x4, 1.0), splat(F32x4, math.nan_f32))), 0) == true); + try expect(all(isNan(atan2(splat(F32x4, -1.0), splat(F32x4, math.nan_f32))), 0) == true); + try expect(all(isNan(atan2(splat(F32x4, math.nan_f32), splat(F32x4, -1.0))), 0) == true); + try expect(all(isNan(atan2(splat(F32x4, -math.nan_f32), splat(F32x4, 1.0))), 0) == true); +} +// ------------------------------------------------------------------------------ +// +// 3. 2D, 3D, 4D vector functions +// +// ------------------------------------------------------------------------------ +pub inline fn dot2(v0: Vec, v1: Vec) F32x4 { + var xmm0 = v0 * v1; // | x0*x1 | y0*y1 | -- | -- | + var xmm1 = swizzle(xmm0, .y, .x, .x, .x); // | y0*y1 | -- | -- | -- | + xmm0 = f32x4(xmm0[0] + xmm1[0], xmm0[1], xmm0[2], xmm0[3]); // | x0*x1 + y0*y1 | -- | -- | -- | + return swizzle(xmm0, .x, .x, .x, .x); +} +test "zmath.dot2" { + const v0 = f32x4(-1.0, 2.0, 300.0, -2.0); + const v1 = f32x4(4.0, 5.0, 600.0, 2.0); + var v = dot2(v0, v1); + try expect(approxEqAbs(v, splat(F32x4, 6.0), 0.0001)); +} + +pub inline fn dot3(v0: Vec, v1: Vec) F32x4 { + const dot = v0 * v1; + return f32x4s(dot[0] + dot[1] + dot[2]); +} +test "zmath.dot3" { + const v0 = f32x4(-1.0, 2.0, 3.0, 1.0); + const v1 = f32x4(4.0, 5.0, 6.0, 1.0); + var v = dot3(v0, v1); + try expect(approxEqAbs(v, splat(F32x4, 24.0), 0.0001)); +} + +pub inline fn dot4(v0: Vec, v1: Vec) F32x4 { + var xmm0 = v0 * v1; // | x0*x1 | y0*y1 | z0*z1 | w0*w1 | + var xmm1 = swizzle(xmm0, .y, .x, .w, .x); // | y0*y1 | -- | w0*w1 | -- | + xmm1 = xmm0 + xmm1; // | x0*x1 + y0*y1 | -- | z0*z1 + w0*w1 | -- | + xmm0 = swizzle(xmm1, .z, .x, .x, .x); // | z0*z1 + w0*w1 | -- | -- | -- | + xmm0 = f32x4(xmm0[0] + xmm1[0], xmm0[1], xmm0[2], xmm0[2]); // addss + return swizzle(xmm0, .x, .x, .x, .x); +} +test "zmath.dot4" { + const v0 = f32x4(-1.0, 2.0, 3.0, -2.0); + const v1 = f32x4(4.0, 5.0, 6.0, 2.0); + var v = dot4(v0, v1); + try expect(approxEqAbs(v, splat(F32x4, 20.0), 0.0001)); +} + +pub inline fn cross3(v0: Vec, v1: Vec) Vec { + var xmm0 = swizzle(v0, .y, .z, .x, .w); + var xmm1 = swizzle(v1, .z, .x, .y, .w); + var result = xmm0 * xmm1; + xmm0 = swizzle(xmm0, .y, .z, .x, .w); + xmm1 = swizzle(xmm1, .z, .x, .y, .w); + result = result - xmm0 * xmm1; + return andInt(result, f32x4_mask3); +} +test "zmath.cross3" { + { + const v0 = f32x4(1.0, 0.0, 0.0, 1.0); + const v1 = f32x4(0.0, 1.0, 0.0, 1.0); + var v = cross3(v0, v1); + try expect(approxEqAbs(v, f32x4(0.0, 0.0, 1.0, 0.0), 0.0001)); + } + { + const v0 = f32x4(1.0, 0.0, 0.0, 1.0); + const v1 = f32x4(0.0, -1.0, 0.0, 1.0); + var v = cross3(v0, v1); + try expect(approxEqAbs(v, f32x4(0.0, 0.0, -1.0, 0.0), 0.0001)); + } + { + const v0 = f32x4(-3.0, 0, -2.0, 1.0); + const v1 = f32x4(5.0, -1.0, 2.0, 1.0); + var v = cross3(v0, v1); + try expect(approxEqAbs(v, f32x4(-2.0, -4.0, 3.0, 0.0), 0.0001)); + } +} + +pub inline fn lengthSq2(v: Vec) F32x4 { + return dot2(v, v); +} +pub inline fn lengthSq3(v: Vec) F32x4 { + return dot3(v, v); +} +pub inline fn lengthSq4(v: Vec) F32x4 { + return dot4(v, v); +} + +pub inline fn length2(v: Vec) F32x4 { + return sqrt(dot2(v, v)); +} +pub inline fn length3(v: Vec) F32x4 { + return sqrt(dot3(v, v)); +} +pub inline fn length4(v: Vec) F32x4 { + return sqrt(dot4(v, v)); +} +test "zmath.length3" { + if (builtin.target.os.tag == .macos and builtin.zig_backend != .stage1) return error.SkipZigTest; + { + const v = length3(f32x4(1.0, -2.0, 3.0, 1000.0)); + try expect(approxEqAbs(v, splat(F32x4, math.sqrt(14.0)), 0.001)); + } + { + const v = length3(f32x4(1.0, math.nan_f32, math.nan_f32, 1000.0)); + try expect(all(isNan(v), 0)); + } + { + const v = length3(f32x4(1.0, math.inf_f32, 3.0, 1000.0)); + try expect(all(isInf(v), 0)); + } + { + const v = length3(f32x4(3.0, 2.0, 1.0, math.nan_f32)); + try expect(approxEqAbs(v, splat(F32x4, math.sqrt(14.0)), 0.001)); + } +} + +pub inline fn normalize2(v: Vec) Vec { + return v * splat(F32x4, 1.0) / sqrt(dot2(v, v)); +} +pub inline fn normalize3(v: Vec) Vec { + return v * splat(F32x4, 1.0) / sqrt(dot3(v, v)); +} +pub inline fn normalize4(v: Vec) Vec { + return v * splat(F32x4, 1.0) / sqrt(dot4(v, v)); +} +test "zmath.normalize3" { + { + const v0 = f32x4(1.0, -2.0, 3.0, 1000.0); + var v = normalize3(v0); + try expect(approxEqAbs(v, v0 * splat(F32x4, 1.0 / math.sqrt(14.0)), 0.0005)); + } + { + try expect(any(isNan(normalize3(f32x4(1.0, math.inf_f32, 1.0, 1.0))), 0)); + try expect(any(isNan(normalize3(f32x4(-math.inf_f32, math.inf_f32, 0.0, 0.0))), 0)); + try expect(any(isNan(normalize3(f32x4(-math.nan_f32, math.qnan_f32, 0.0, 0.0))), 0)); + try expect(any(isNan(normalize3(f32x4(0, 0, 0, 0))), 0)); + } +} +test "zmath.normalize4" { + { + const v0 = f32x4(1.0, -2.0, 3.0, 10.0); + var v = normalize4(v0); + try expect(approxEqAbs(v, v0 * splat(F32x4, 1.0 / math.sqrt(114.0)), 0.0005)); + } + { + try expect(any(isNan(normalize4(f32x4(1.0, math.inf_f32, 1.0, 1.0))), 0)); + try expect(any(isNan(normalize4(f32x4(-math.inf_f32, math.inf_f32, 0.0, 0.0))), 0)); + try expect(any(isNan(normalize4(f32x4(-math.nan_f32, math.qnan_f32, 0.0, 0.0))), 0)); + try expect(any(isNan(normalize4(f32x4(0, 0, 0, 0))), 0)); + } +} + +fn vecMulMat(v: Vec, m: Mat) Vec { + var vx = @shuffle(f32, v, undefined, [4]i32{ 0, 0, 0, 0 }); + var vy = @shuffle(f32, v, undefined, [4]i32{ 1, 1, 1, 1 }); + var vz = @shuffle(f32, v, undefined, [4]i32{ 2, 2, 2, 2 }); + var vw = @shuffle(f32, v, undefined, [4]i32{ 3, 3, 3, 3 }); + return vx * m[0] + vy * m[1] + vz * m[2] + vw * m[3]; +} +fn matMulVec(m: Mat, v: Vec) Vec { + return .{ dot4(m[0], v)[0], dot4(m[1], v)[0], dot4(m[2], v)[0], dot4(m[3], v)[0] }; +} +test "zmath.vecMulMat" { + const m = Mat{ + f32x4(1.0, 0.0, 0.0, 0.0), + f32x4(0.0, 1.0, 0.0, 0.0), + f32x4(0.0, 0.0, 1.0, 0.0), + f32x4(2.0, 3.0, 4.0, 1.0), + }; + const vm = mul(f32x4(1.0, 2.0, 3.0, 1.0), m); + const mv = mul(m, f32x4(1.0, 2.0, 3.0, 1.0)); + const v = mul(transpose(m), f32x4(1.0, 2.0, 3.0, 1.0)); + try expect(approxEqAbs(vm, f32x4(3.0, 5.0, 7.0, 1.0), 0.0001)); + try expect(approxEqAbs(mv, f32x4(1.0, 2.0, 3.0, 21.0), 0.0001)); + try expect(approxEqAbs(v, f32x4(3.0, 5.0, 7.0, 1.0), 0.0001)); +} +// ------------------------------------------------------------------------------ +// +// 4. Matrix functions +// +// ------------------------------------------------------------------------------ +pub fn identity() Mat { + const static = struct { + const identity = Mat{ + f32x4(1.0, 0.0, 0.0, 0.0), + f32x4(0.0, 1.0, 0.0, 0.0), + f32x4(0.0, 0.0, 1.0, 0.0), + f32x4(0.0, 0.0, 0.0, 1.0), + }; + }; + return static.identity; +} + +fn mulRetType(comptime Ta: type, comptime Tb: type) type { + if (Ta == Mat and Tb == Mat) { + return Mat; + } else if ((Ta == f32 and Tb == Mat) or (Ta == Mat and Tb == f32)) { + return Mat; + } else if ((Ta == Vec and Tb == Mat) or (Ta == Mat and Tb == Vec)) { + return Vec; + } + @compileError("zmath.mul() not implemented for types: " ++ @typeName(Ta) ++ @typeName(Tb)); +} + +pub fn mul(a: anytype, b: anytype) mulRetType(@TypeOf(a), @TypeOf(b)) { + const Ta = @TypeOf(a); + const Tb = @TypeOf(b); + if (Ta == Mat and Tb == Mat) { + return mulMat(a, b); + } else if (Ta == f32 and Tb == Mat) { + const va = splat(F32x4, a); + return Mat{ va * b[0], va * b[1], va * b[2], va * b[3] }; + } else if (Ta == Mat and Tb == f32) { + const vb = splat(F32x4, b); + return Mat{ a[0] * vb, a[1] * vb, a[2] * vb, a[3] * vb }; + } else if (Ta == Vec and Tb == Mat) { + return vecMulMat(a, b); + } else if (Ta == Mat and Tb == Vec) { + return matMulVec(a, b); + } else { + @compileError("zmath.mul() not implemented for types: " ++ @typeName(Ta) ++ ", " ++ @typeName(Tb)); + } +} +test "zmath.mul" { + { + const m = Mat{ + f32x4(0.1, 0.2, 0.3, 0.4), + f32x4(0.5, 0.6, 0.7, 0.8), + f32x4(0.9, 1.0, 1.1, 1.2), + f32x4(1.3, 1.4, 1.5, 1.6), + }; + const ms = mul(@as(f32, 2.0), m); + try expect(approxEqAbs(ms[0], f32x4(0.2, 0.4, 0.6, 0.8), 0.0001)); + try expect(approxEqAbs(ms[1], f32x4(1.0, 1.2, 1.4, 1.6), 0.0001)); + try expect(approxEqAbs(ms[2], f32x4(1.8, 2.0, 2.2, 2.4), 0.0001)); + try expect(approxEqAbs(ms[3], f32x4(2.6, 2.8, 3.0, 3.2), 0.0001)); + } +} + +fn mulMat(m0: Mat, m1: Mat) Mat { + var result: Mat = undefined; + comptime var row: u32 = 0; + inline while (row < 4) : (row += 1) { + const vx = swizzle(m0[row], .x, .x, .x, .x); + const vy = swizzle(m0[row], .y, .y, .y, .y); + const vz = swizzle(m0[row], .z, .z, .z, .z); + const vw = swizzle(m0[row], .w, .w, .w, .w); + result[row] = mulAdd(vx, m1[0], vz * m1[2]) + mulAdd(vy, m1[1], vw * m1[3]); + } + return result; +} +test "zmath.matrix.mul" { + const a = Mat{ + f32x4(0.1, 0.2, 0.3, 0.4), + f32x4(0.5, 0.6, 0.7, 0.8), + f32x4(0.9, 1.0, 1.1, 1.2), + f32x4(1.3, 1.4, 1.5, 1.6), + }; + const b = Mat{ + f32x4(1.7, 1.8, 1.9, 2.0), + f32x4(2.1, 2.2, 2.3, 2.4), + f32x4(2.5, 2.6, 2.7, 2.8), + f32x4(2.9, 3.0, 3.1, 3.2), + }; + const c = mul(a, b); + try expect(approxEqAbs(c[0], f32x4(2.5, 2.6, 2.7, 2.8), 0.0001)); + try expect(approxEqAbs(c[1], f32x4(6.18, 6.44, 6.7, 6.96), 0.0001)); + try expect(approxEqAbs(c[2], f32x4(9.86, 10.28, 10.7, 11.12), 0.0001)); + try expect(approxEqAbs(c[3], f32x4(13.54, 14.12, 14.7, 15.28), 0.0001)); +} + +pub fn transpose(m: Mat) Mat { + const temp1 = @shuffle(f32, m[0], m[1], [4]i32{ 0, 1, ~@as(i32, 0), ~@as(i32, 1) }); + const temp3 = @shuffle(f32, m[0], m[1], [4]i32{ 2, 3, ~@as(i32, 2), ~@as(i32, 3) }); + const temp2 = @shuffle(f32, m[2], m[3], [4]i32{ 0, 1, ~@as(i32, 0), ~@as(i32, 1) }); + const temp4 = @shuffle(f32, m[2], m[3], [4]i32{ 2, 3, ~@as(i32, 2), ~@as(i32, 3) }); + return .{ + @shuffle(f32, temp1, temp2, [4]i32{ 0, 2, ~@as(i32, 0), ~@as(i32, 2) }), + @shuffle(f32, temp1, temp2, [4]i32{ 1, 3, ~@as(i32, 1), ~@as(i32, 3) }), + @shuffle(f32, temp3, temp4, [4]i32{ 0, 2, ~@as(i32, 0), ~@as(i32, 2) }), + @shuffle(f32, temp3, temp4, [4]i32{ 1, 3, ~@as(i32, 1), ~@as(i32, 3) }), + }; +} +test "zmath.matrix.transpose" { + const m = Mat{ + f32x4(1.0, 2.0, 3.0, 4.0), + f32x4(5.0, 6.0, 7.0, 8.0), + f32x4(9.0, 10.0, 11.0, 12.0), + f32x4(13.0, 14.0, 15.0, 16.0), + }; + const mt = transpose(m); + try expect(approxEqAbs(mt[0], f32x4(1.0, 5.0, 9.0, 13.0), 0.0001)); + try expect(approxEqAbs(mt[1], f32x4(2.0, 6.0, 10.0, 14.0), 0.0001)); + try expect(approxEqAbs(mt[2], f32x4(3.0, 7.0, 11.0, 15.0), 0.0001)); + try expect(approxEqAbs(mt[3], f32x4(4.0, 8.0, 12.0, 16.0), 0.0001)); +} + +pub fn rotationX(angle: f32) Mat { + const sc = sincos(angle); + return .{ + f32x4(1.0, 0.0, 0.0, 0.0), + f32x4(0.0, sc[1], sc[0], 0.0), + f32x4(0.0, -sc[0], sc[1], 0.0), + f32x4(0.0, 0.0, 0.0, 1.0), + }; +} + +pub fn rotationY(angle: f32) Mat { + const sc = sincos(angle); + return .{ + f32x4(sc[1], 0.0, -sc[0], 0.0), + f32x4(0.0, 1.0, 0.0, 0.0), + f32x4(sc[0], 0.0, sc[1], 0.0), + f32x4(0.0, 0.0, 0.0, 1.0), + }; +} + +pub fn rotationZ(angle: f32) Mat { + const sc = sincos(angle); + return .{ + f32x4(sc[1], sc[0], 0.0, 0.0), + f32x4(-sc[0], sc[1], 0.0, 0.0), + f32x4(0.0, 0.0, 1.0, 0.0), + f32x4(0.0, 0.0, 0.0, 1.0), + }; +} + +pub fn translation(x: f32, y: f32, z: f32) Mat { + return .{ + f32x4(1.0, 0.0, 0.0, 0.0), + f32x4(0.0, 1.0, 0.0, 0.0), + f32x4(0.0, 0.0, 1.0, 0.0), + f32x4(x, y, z, 1.0), + }; +} +pub fn translationV(v: Vec) Mat { + return translation(v[0], v[1], v[2]); +} + +pub fn scaling(x: f32, y: f32, z: f32) Mat { + return .{ + f32x4(x, 0.0, 0.0, 0.0), + f32x4(0.0, y, 0.0, 0.0), + f32x4(0.0, 0.0, z, 0.0), + f32x4(0.0, 0.0, 0.0, 1.0), + }; +} +pub fn scalingV(v: Vec) Mat { + return scaling(v[0], v[1], v[2]); +} + +pub fn lookToLh(eyepos: Vec, eyedir: Vec, updir: Vec) Mat { + const az = normalize3(eyedir); + const ax = normalize3(cross3(updir, az)); + const ay = normalize3(cross3(az, ax)); + return transpose(.{ + f32x4(ax[0], ax[1], ax[2], -dot3(ax, eyepos)[0]), + f32x4(ay[0], ay[1], ay[2], -dot3(ay, eyepos)[0]), + f32x4(az[0], az[1], az[2], -dot3(az, eyepos)[0]), + f32x4(0.0, 0.0, 0.0, 1.0), + }); +} +pub fn lookToRh(eyepos: Vec, eyedir: Vec, updir: Vec) Mat { + return lookToLh(eyepos, -eyedir, updir); +} +pub fn lookAtLh(eyepos: Vec, focuspos: Vec, updir: Vec) Mat { + return lookToLh(eyepos, focuspos - eyepos, updir); +} +pub fn lookAtRh(eyepos: Vec, focuspos: Vec, updir: Vec) Mat { + return lookToLh(eyepos, eyepos - focuspos, updir); +} +test "zmath.matrix.lookToLh" { + const m = lookToLh(f32x4(0.0, 0.0, -3.0, 1.0), f32x4(0.0, 0.0, 1.0, 0.0), f32x4(0.0, 1.0, 0.0, 0.0)); + try expect(approxEqAbs(m[0], f32x4(1.0, 0.0, 0.0, 0.0), 0.001)); + try expect(approxEqAbs(m[1], f32x4(0.0, 1.0, 0.0, 0.0), 0.001)); + try expect(approxEqAbs(m[2], f32x4(0.0, 0.0, 1.0, 0.0), 0.001)); + try expect(approxEqAbs(m[3], f32x4(0.0, 0.0, 3.0, 1.0), 0.001)); +} + +pub fn perspectiveFovLh(fovy: f32, aspect: f32, near: f32, far: f32) Mat { + const scfov = sincos(0.5 * fovy); + + assert(near > 0.0 and far > 0.0 and far > near); + assert(!math.approxEqAbs(f32, scfov[0], 0.0, 0.001)); + assert(!math.approxEqAbs(f32, far, near, 0.001)); + assert(!math.approxEqAbs(f32, aspect, 0.0, 0.01)); + + const h = scfov[1] / scfov[0]; + const w = h / aspect; + const r = far / (far - near); + return .{ + f32x4(w, 0.0, 0.0, 0.0), + f32x4(0.0, h, 0.0, 0.0), + f32x4(0.0, 0.0, r, 1.0), + f32x4(0.0, 0.0, -r * near, 0.0), + }; +} +pub fn perspectiveFovRh(fovy: f32, aspect: f32, near: f32, far: f32) Mat { + const scfov = sincos(0.5 * fovy); + + assert(near > 0.0 and far > 0.0 and far > near); + assert(!math.approxEqAbs(f32, scfov[0], 0.0, 0.001)); + assert(!math.approxEqAbs(f32, far, near, 0.001)); + assert(!math.approxEqAbs(f32, aspect, 0.0, 0.01)); + + const h = scfov[1] / scfov[0]; + const w = h / aspect; + const r = far / (near - far); + return .{ + f32x4(w, 0.0, 0.0, 0.0), + f32x4(0.0, h, 0.0, 0.0), + f32x4(0.0, 0.0, r, -1.0), + f32x4(0.0, 0.0, r * near, 0.0), + }; +} + +// Produces Z values in [-1.0, 1.0] range (OpenGL defaults) +pub fn perspectiveFovLhGl(fovy: f32, aspect: f32, near: f32, far: f32) Mat { + const scfov = sincos(0.5 * fovy); + + assert(near > 0.0 and far > 0.0 and far > near); + assert(!math.approxEqAbs(f32, scfov[0], 0.0, 0.001)); + assert(!math.approxEqAbs(f32, far, near, 0.001)); + assert(!math.approxEqAbs(f32, aspect, 0.0, 0.01)); + + const h = scfov[1] / scfov[0]; + const w = h / aspect; + const r = far - near; + return .{ + f32x4(w, 0.0, 0.0, 0.0), + f32x4(0.0, h, 0.0, 0.0), + f32x4(0.0, 0.0, (near + far) / r, 1.0), + f32x4(0.0, 0.0, 2.0 * near * far / -r, 0.0), + }; +} + +// Produces Z values in [-1.0, 1.0] range (OpenGL defaults) +pub fn perspectiveFovRhGl(fovy: f32, aspect: f32, near: f32, far: f32) Mat { + const scfov = sincos(0.5 * fovy); + + assert(near > 0.0 and far > 0.0 and far > near); + assert(!math.approxEqAbs(f32, scfov[0], 0.0, 0.001)); + assert(!math.approxEqAbs(f32, far, near, 0.001)); + assert(!math.approxEqAbs(f32, aspect, 0.0, 0.01)); + + const h = scfov[1] / scfov[0]; + const w = h / aspect; + const r = near - far; + return .{ + f32x4(w, 0.0, 0.0, 0.0), + f32x4(0.0, h, 0.0, 0.0), + f32x4(0.0, 0.0, (near + far) / r, -1.0), + f32x4(0.0, 0.0, 2.0 * near * far / r, 0.0), + }; +} + +pub fn orthographicLh(w: f32, h: f32, near: f32, far: f32) Mat { + assert(!math.approxEqAbs(f32, w, 0.0, 0.001)); + assert(!math.approxEqAbs(f32, h, 0.0, 0.001)); + assert(!math.approxEqAbs(f32, far, near, 0.001)); + + const r = 1 / (far - near); + return .{ + f32x4(2 / w, 0.0, 0.0, 0.0), + f32x4(0.0, 2 / h, 0.0, 0.0), + f32x4(0.0, 0.0, r, 0.0), + f32x4(0.0, 0.0, -r * near, 1.0), + }; +} + +pub fn orthographicRh(w: f32, h: f32, near: f32, far: f32) Mat { + assert(!math.approxEqAbs(f32, w, 0.0, 0.001)); + assert(!math.approxEqAbs(f32, h, 0.0, 0.001)); + assert(!math.approxEqAbs(f32, far, near, 0.001)); + + const r = 1 / (near - far); + return .{ + f32x4(2 / w, 0.0, 0.0, 0.0), + f32x4(0.0, 2 / h, 0.0, 0.0), + f32x4(0.0, 0.0, r, 0.0), + f32x4(0.0, 0.0, r * near, 1.0), + }; +} + +// Produces Z values in [-1.0, 1.0] range (OpenGL defaults) +pub fn orthographicLhGl(w: f32, h: f32, near: f32, far: f32) Mat { + assert(!math.approxEqAbs(f32, w, 0.0, 0.001)); + assert(!math.approxEqAbs(f32, h, 0.0, 0.001)); + assert(!math.approxEqAbs(f32, far, near, 0.001)); + + const r = far - near; + return .{ + f32x4(2 / w, 0.0, 0.0, 0.0), + f32x4(0.0, 2 / h, 0.0, 0.0), + f32x4(0.0, 0.0, 2 / r, 0.0), + f32x4(0.0, 0.0, (near + far) / -r, 1.0), + }; +} + +// Produces Z values in [-1.0, 1.0] range (OpenGL defaults) +pub fn orthographicRhGl(w: f32, h: f32, near: f32, far: f32) Mat { + assert(!math.approxEqAbs(f32, w, 0.0, 0.001)); + assert(!math.approxEqAbs(f32, h, 0.0, 0.001)); + assert(!math.approxEqAbs(f32, far, near, 0.001)); + + const r = near - far; + return .{ + f32x4(2 / w, 0.0, 0.0, 0.0), + f32x4(0.0, 2 / h, 0.0, 0.0), + f32x4(0.0, 0.0, 2 / r, 0.0), + f32x4(0.0, 0.0, (near + far) / r, 1.0), + }; +} + +pub fn orthographicOffCenterLh(left: f32, right: f32, top: f32, bottom: f32, near: f32, far: f32) Mat { + assert(!math.approxEqAbs(f32, far, near, 0.001)); + + const r = 1 / (far - near); + return .{ + f32x4(2 / (right - left), 0.0, 0.0, 0.0), + f32x4(0.0, 2 / (top - bottom), 0.0, 0.0), + f32x4(0.0, 0.0, r, 0.0), + f32x4(-(right + left) / (right - left), -(top + bottom) / (top - bottom), -r * near, 1.0), + }; +} + +pub fn orthographicOffCenterRh(left: f32, right: f32, top: f32, bottom: f32, near: f32, far: f32) Mat { + assert(!math.approxEqAbs(f32, far, near, 0.001)); + + const r = 1 / (near - far); + return .{ + f32x4(2 / (right - left), 0.0, 0.0, 0.0), + f32x4(0.0, 2 / (top - bottom), 0.0, 0.0), + f32x4(0.0, 0.0, r, 0.0), + f32x4(-(right + left) / (right - left), -(top + bottom) / (top - bottom), r * near, 1.0), + }; +} + +// Produces Z values in [-1.0, 1.0] range (OpenGL defaults) +pub fn orthographicOffCenterLhGl(left: f32, right: f32, top: f32, bottom: f32, near: f32, far: f32) Mat { + assert(!math.approxEqAbs(f32, far, near, 0.001)); + + const r = far - near; + return .{ + f32x4(2 / (right - left), 0.0, 0.0, 0.0), + f32x4(0.0, 2 / (top - bottom), 0.0, 0.0), + f32x4(0.0, 0.0, 2 / r, 0.0), + f32x4(-(right + left) / (right - left), -(top + bottom) / (top - bottom), (near + far) / -r, 1.0), + }; +} + +// Produces Z values in [-1.0, 1.0] range (OpenGL defaults) +pub fn orthographicOffCenterRhGl(left: f32, right: f32, top: f32, bottom: f32, near: f32, far: f32) Mat { + assert(!math.approxEqAbs(f32, far, near, 0.001)); + + const r = near - far; + return .{ + f32x4(2 / (right - left), 0.0, 0.0, 0.0), + f32x4(0.0, 2 / (top - bottom), 0.0, 0.0), + f32x4(0.0, 0.0, 2 / r, 0.0), + f32x4(-(right + left) / (right - left), -(top + bottom) / (top - bottom), (near + far) / r, 1.0), + }; +} + +pub fn determinant(m: Mat) F32x4 { + var v0 = swizzle(m[2], .y, .x, .x, .x); + var v1 = swizzle(m[3], .z, .z, .y, .y); + var v2 = swizzle(m[2], .y, .x, .x, .x); + var v3 = swizzle(m[3], .w, .w, .w, .z); + var v4 = swizzle(m[2], .z, .z, .y, .y); + var v5 = swizzle(m[3], .w, .w, .w, .z); + + var p0 = v0 * v1; + var p1 = v2 * v3; + var p2 = v4 * v5; + + v0 = swizzle(m[2], .z, .z, .y, .y); + v1 = swizzle(m[3], .y, .x, .x, .x); + v2 = swizzle(m[2], .w, .w, .w, .z); + v3 = swizzle(m[3], .y, .x, .x, .x); + v4 = swizzle(m[2], .w, .w, .w, .z); + v5 = swizzle(m[3], .z, .z, .y, .y); + + p0 = mulAdd(-v0, v1, p0); + p1 = mulAdd(-v2, v3, p1); + p2 = mulAdd(-v4, v5, p2); + + v0 = swizzle(m[1], .w, .w, .w, .z); + v1 = swizzle(m[1], .z, .z, .y, .y); + v2 = swizzle(m[1], .y, .x, .x, .x); + + var s = m[0] * f32x4(1.0, -1.0, 1.0, -1.0); + var r = v0 * p0; + r = mulAdd(-v1, p1, r); + r = mulAdd(v2, p2, r); + return dot4(s, r); +} +test "zmath.matrix.determinant" { + const m = Mat{ + f32x4(10.0, -9.0, -12.0, 1.0), + f32x4(7.0, -12.0, 11.0, 1.0), + f32x4(-10.0, 10.0, 3.0, 1.0), + f32x4(1.0, 2.0, 3.0, 4.0), + }; + try expect(approxEqAbs(determinant(m), splat(F32x4, 2939.0), 0.0001)); +} + +pub fn inverse(a: anytype) @TypeOf(a) { + const T = @TypeOf(a); + return switch (T) { + Mat => inverseMat(a), + Quat => inverseQuat(a), + else => @compileError("zmath.inverse() not implemented for " ++ @typeName(T)), + }; +} + +fn inverseMat(m: Mat) Mat { + return inverseDet(m, null); +} + +pub fn inverseDet(m: Mat, out_det: ?*F32x4) Mat { + const mt = transpose(m); + var v0: [4]F32x4 = undefined; + var v1: [4]F32x4 = undefined; + + v0[0] = swizzle(mt[2], .x, .x, .y, .y); + v1[0] = swizzle(mt[3], .z, .w, .z, .w); + v0[1] = swizzle(mt[0], .x, .x, .y, .y); + v1[1] = swizzle(mt[1], .z, .w, .z, .w); + v0[2] = @shuffle(f32, mt[2], mt[0], [4]i32{ 0, 2, ~@as(i32, 0), ~@as(i32, 2) }); + v1[2] = @shuffle(f32, mt[3], mt[1], [4]i32{ 1, 3, ~@as(i32, 1), ~@as(i32, 3) }); + + var d0 = v0[0] * v1[0]; + var d1 = v0[1] * v1[1]; + var d2 = v0[2] * v1[2]; + + v0[0] = swizzle(mt[2], .z, .w, .z, .w); + v1[0] = swizzle(mt[3], .x, .x, .y, .y); + v0[1] = swizzle(mt[0], .z, .w, .z, .w); + v1[1] = swizzle(mt[1], .x, .x, .y, .y); + v0[2] = @shuffle(f32, mt[2], mt[0], [4]i32{ 1, 3, ~@as(i32, 1), ~@as(i32, 3) }); + v1[2] = @shuffle(f32, mt[3], mt[1], [4]i32{ 0, 2, ~@as(i32, 0), ~@as(i32, 2) }); + + d0 = mulAdd(-v0[0], v1[0], d0); + d1 = mulAdd(-v0[1], v1[1], d1); + d2 = mulAdd(-v0[2], v1[2], d2); + + v0[0] = swizzle(mt[1], .y, .z, .x, .y); + v1[0] = @shuffle(f32, d0, d2, [4]i32{ ~@as(i32, 1), 1, 3, 0 }); + v0[1] = swizzle(mt[0], .z, .x, .y, .x); + v1[1] = @shuffle(f32, d0, d2, [4]i32{ 3, ~@as(i32, 1), 1, 2 }); + v0[2] = swizzle(mt[3], .y, .z, .x, .y); + v1[2] = @shuffle(f32, d1, d2, [4]i32{ ~@as(i32, 3), 1, 3, 0 }); + v0[3] = swizzle(mt[2], .z, .x, .y, .x); + v1[3] = @shuffle(f32, d1, d2, [4]i32{ 3, ~@as(i32, 3), 1, 2 }); + + var c0 = v0[0] * v1[0]; + var c2 = v0[1] * v1[1]; + var c4 = v0[2] * v1[2]; + var c6 = v0[3] * v1[3]; + + v0[0] = swizzle(mt[1], .z, .w, .y, .z); + v1[0] = @shuffle(f32, d0, d2, [4]i32{ 3, 0, 1, ~@as(i32, 0) }); + v0[1] = swizzle(mt[0], .w, .z, .w, .y); + v1[1] = @shuffle(f32, d0, d2, [4]i32{ 2, 1, ~@as(i32, 0), 0 }); + v0[2] = swizzle(mt[3], .z, .w, .y, .z); + v1[2] = @shuffle(f32, d1, d2, [4]i32{ 3, 0, 1, ~@as(i32, 2) }); + v0[3] = swizzle(mt[2], .w, .z, .w, .y); + v1[3] = @shuffle(f32, d1, d2, [4]i32{ 2, 1, ~@as(i32, 2), 0 }); + + c0 = mulAdd(-v0[0], v1[0], c0); + c2 = mulAdd(-v0[1], v1[1], c2); + c4 = mulAdd(-v0[2], v1[2], c4); + c6 = mulAdd(-v0[3], v1[3], c6); + + v0[0] = swizzle(mt[1], .w, .x, .w, .x); + v1[0] = @shuffle(f32, d0, d2, [4]i32{ 2, ~@as(i32, 1), ~@as(i32, 0), 2 }); + v0[1] = swizzle(mt[0], .y, .w, .x, .z); + v1[1] = @shuffle(f32, d0, d2, [4]i32{ ~@as(i32, 1), 0, 3, ~@as(i32, 0) }); + v0[2] = swizzle(mt[3], .w, .x, .w, .x); + v1[2] = @shuffle(f32, d1, d2, [4]i32{ 2, ~@as(i32, 3), ~@as(i32, 2), 2 }); + v0[3] = swizzle(mt[2], .y, .w, .x, .z); + v1[3] = @shuffle(f32, d1, d2, [4]i32{ ~@as(i32, 3), 0, 3, ~@as(i32, 2) }); + + const c1 = mulAdd(-v0[0], v1[0], c0); + const c3 = mulAdd(v0[1], v1[1], c2); + const c5 = mulAdd(-v0[2], v1[2], c4); + const c7 = mulAdd(v0[3], v1[3], c6); + + c0 = mulAdd(v0[0], v1[0], c0); + c2 = mulAdd(-v0[1], v1[1], c2); + c4 = mulAdd(v0[2], v1[2], c4); + c6 = mulAdd(-v0[3], v1[3], c6); + + var mr = Mat{ + f32x4(c0[0], c1[1], c0[2], c1[3]), + f32x4(c2[0], c3[1], c2[2], c3[3]), + f32x4(c4[0], c5[1], c4[2], c5[3]), + f32x4(c6[0], c7[1], c6[2], c7[3]), + }; + + const det = dot4(mr[0], mt[0]); + if (out_det != null) { + out_det.?.* = det; + } + + if (math.approxEqAbs(f32, det[0], 0.0, math.f32_epsilon)) { + return .{ + f32x4(0.0, 0.0, 0.0, 0.0), + f32x4(0.0, 0.0, 0.0, 0.0), + f32x4(0.0, 0.0, 0.0, 0.0), + f32x4(0.0, 0.0, 0.0, 0.0), + }; + } + + const scale = splat(F32x4, 1.0) / det; + mr[0] *= scale; + mr[1] *= scale; + mr[2] *= scale; + mr[3] *= scale; + return mr; +} +test "zmath.matrix.inverse" { + const m = Mat{ + f32x4(10.0, -9.0, -12.0, 1.0), + f32x4(7.0, -12.0, 11.0, 1.0), + f32x4(-10.0, 10.0, 3.0, 1.0), + f32x4(1.0, 2.0, 3.0, 4.0), + }; + var det: F32x4 = undefined; + const mi = inverseDet(m, &det); + try expect(approxEqAbs(det, splat(F32x4, 2939.0), 0.0001)); + + try expect(approxEqAbs(mi[0], f32x4(-0.170806, -0.13576, -0.349439, 0.164001), 0.0001)); + try expect(approxEqAbs(mi[1], f32x4(-0.163661, -0.14801, -0.253147, 0.141204), 0.0001)); + try expect(approxEqAbs(mi[2], f32x4(-0.0871045, 0.00646478, -0.0785982, 0.0398095), 0.0001)); + try expect(approxEqAbs(mi[3], f32x4(0.18986, 0.103096, 0.272882, 0.10854), 0.0001)); +} + +pub fn matFromNormAxisAngle(axis: Vec, angle: f32) Mat { + const sincos_angle = sincos(angle); + + const c2 = splat(F32x4, 1.0 - sincos_angle[1]); + const c1 = splat(F32x4, sincos_angle[1]); + const c0 = splat(F32x4, sincos_angle[0]); + + const n0 = swizzle(axis, .y, .z, .x, .w); + const n1 = swizzle(axis, .z, .x, .y, .w); + + var v0 = c2 * n0 * n1; + const r0 = c2 * axis * axis + c1; + const r1 = c0 * axis + v0; + var r2 = v0 - c0 * axis; + + v0 = andInt(r0, f32x4_mask3); + + var v1 = @shuffle(f32, r1, r2, [4]i32{ 0, 2, ~@as(i32, 1), ~@as(i32, 2) }); + v1 = swizzle(v1, .y, .z, .w, .x); + + var v2 = @shuffle(f32, r1, r2, [4]i32{ 1, 1, ~@as(i32, 0), ~@as(i32, 0) }); + v2 = swizzle(v2, .x, .z, .x, .z); + + r2 = @shuffle(f32, v0, v1, [4]i32{ 0, 3, ~@as(i32, 0), ~@as(i32, 1) }); + r2 = swizzle(r2, .x, .z, .w, .y); + + var m: Mat = undefined; + m[0] = r2; + + r2 = @shuffle(f32, v0, v1, [4]i32{ 1, 3, ~@as(i32, 2), ~@as(i32, 3) }); + r2 = swizzle(r2, .z, .x, .w, .y); + m[1] = r2; + + v2 = @shuffle(f32, v2, v0, [4]i32{ 0, 1, ~@as(i32, 2), ~@as(i32, 3) }); + m[2] = v2; + m[3] = f32x4(0.0, 0.0, 0.0, 1.0); + return m; +} +pub fn matFromAxisAngle(axis: Vec, angle: f32) Mat { + assert(!all(axis == splat(F32x4, 0.0), 3)); + assert(!all(isInf(axis), 3)); + const normal = normalize3(axis); + return matFromNormAxisAngle(normal, angle); +} +test "zmath.matrix.matFromAxisAngle" { + { + const m0 = matFromAxisAngle(f32x4(1.0, 0.0, 0.0, 0.0), math.pi * 0.25); + const m1 = rotationX(math.pi * 0.25); + try expect(approxEqAbs(m0[0], m1[0], 0.001)); + try expect(approxEqAbs(m0[1], m1[1], 0.001)); + try expect(approxEqAbs(m0[2], m1[2], 0.001)); + try expect(approxEqAbs(m0[3], m1[3], 0.001)); + } + { + const m0 = matFromAxisAngle(f32x4(0.0, 1.0, 0.0, 0.0), math.pi * 0.125); + const m1 = rotationY(math.pi * 0.125); + try expect(approxEqAbs(m0[0], m1[0], 0.001)); + try expect(approxEqAbs(m0[1], m1[1], 0.001)); + try expect(approxEqAbs(m0[2], m1[2], 0.001)); + try expect(approxEqAbs(m0[3], m1[3], 0.001)); + } + { + const m0 = matFromAxisAngle(f32x4(0.0, 0.0, 1.0, 0.0), math.pi * 0.333); + const m1 = rotationZ(math.pi * 0.333); + try expect(approxEqAbs(m0[0], m1[0], 0.001)); + try expect(approxEqAbs(m0[1], m1[1], 0.001)); + try expect(approxEqAbs(m0[2], m1[2], 0.001)); + try expect(approxEqAbs(m0[3], m1[3], 0.001)); + } +} + +pub fn matFromQuat(quat: Quat) Mat { + var q0 = quat + quat; + var q1 = quat * q0; + + var v0 = swizzle(q1, .y, .x, .x, .w); + v0 = andInt(v0, f32x4_mask3); + + var v1 = swizzle(q1, .z, .z, .y, .w); + v1 = andInt(v1, f32x4_mask3); + + var r0 = (f32x4(1.0, 1.0, 1.0, 0.0) - v0) - v1; + + v0 = swizzle(quat, .x, .x, .y, .w); + v1 = swizzle(q0, .z, .y, .z, .w); + v0 = v0 * v1; + + v1 = swizzle(quat, .w, .w, .w, .w); + var v2 = swizzle(q0, .y, .z, .x, .w); + v1 = v1 * v2; + + var r1 = v0 + v1; + var r2 = v0 - v1; + + v0 = @shuffle(f32, r1, r2, [4]i32{ 1, 2, ~@as(i32, 0), ~@as(i32, 1) }); + v0 = swizzle(v0, .x, .z, .w, .y); + v1 = @shuffle(f32, r1, r2, [4]i32{ 0, 0, ~@as(i32, 2), ~@as(i32, 2) }); + v1 = swizzle(v1, .x, .z, .x, .z); + + q1 = @shuffle(f32, r0, v0, [4]i32{ 0, 3, ~@as(i32, 0), ~@as(i32, 1) }); + q1 = swizzle(q1, .x, .z, .w, .y); + + var m: Mat = undefined; + m[0] = q1; + + q1 = @shuffle(f32, r0, v0, [4]i32{ 1, 3, ~@as(i32, 2), ~@as(i32, 3) }); + q1 = swizzle(q1, .z, .x, .w, .y); + m[1] = q1; + + q1 = @shuffle(f32, v1, r0, [4]i32{ 0, 1, ~@as(i32, 2), ~@as(i32, 3) }); + m[2] = q1; + m[3] = f32x4(0.0, 0.0, 0.0, 1.0); + return m; +} +test "zmath.matrix.matFromQuat" { + { + const m = matFromQuat(f32x4(0.0, 0.0, 0.0, 1.0)); + try expect(approxEqAbs(m[0], f32x4(1.0, 0.0, 0.0, 0.0), 0.0001)); + try expect(approxEqAbs(m[1], f32x4(0.0, 1.0, 0.0, 0.0), 0.0001)); + try expect(approxEqAbs(m[2], f32x4(0.0, 0.0, 1.0, 0.0), 0.0001)); + try expect(approxEqAbs(m[3], f32x4(0.0, 0.0, 0.0, 1.0), 0.0001)); + } +} + +pub fn matFromRollPitchYaw(pitch: f32, yaw: f32, roll: f32) Mat { + return matFromRollPitchYawV(f32x4(pitch, yaw, roll, 0.0)); +} +pub fn matFromRollPitchYawV(angles: Vec) Mat { + return matFromQuat(quatFromRollPitchYawV(angles)); +} + +pub fn matToQuat(m: Mat) Quat { + return quatFromMat(m); +} + +pub inline fn loadMat(mem: []const f32) Mat { + return .{ + load(mem[0..4], F32x4, 0), + load(mem[4..8], F32x4, 0), + load(mem[8..12], F32x4, 0), + load(mem[12..16], F32x4, 0), + }; +} +test "zmath.loadMat" { + const a = [18]f32{ + 1.0, 2.0, 3.0, 4.0, + 5.0, 6.0, 7.0, 8.0, + 9.0, 10.0, 11.0, 12.0, + 13.0, 14.0, 15.0, 16.0, + 17.0, 18.0, + }; + const m = loadMat(a[1..]); + try expect(approxEqAbs(m[0], f32x4(2.0, 3.0, 4.0, 5.0), 0.0)); + try expect(approxEqAbs(m[1], f32x4(6.0, 7.0, 8.0, 9.0), 0.0)); + try expect(approxEqAbs(m[2], f32x4(10.0, 11.0, 12.0, 13.0), 0.0)); + try expect(approxEqAbs(m[3], f32x4(14.0, 15.0, 16.0, 17.0), 0.0)); +} + +pub inline fn storeMat(mem: []f32, m: Mat) void { + store(mem[0..4], m[0], 0); + store(mem[4..8], m[1], 0); + store(mem[8..12], m[2], 0); + store(mem[12..16], m[3], 0); +} + +pub inline fn loadMat43(mem: []const f32) Mat { + return .{ + f32x4(mem[0], mem[1], mem[2], 0.0), + f32x4(mem[3], mem[4], mem[5], 0.0), + f32x4(mem[6], mem[7], mem[8], 0.0), + f32x4(mem[9], mem[10], mem[11], 1.0), + }; +} + +pub inline fn storeMat43(mem: []f32, m: Mat) void { + store(mem[0..3], m[0], 3); + store(mem[3..6], m[1], 3); + store(mem[6..9], m[2], 3); + store(mem[9..12], m[3], 3); +} + +pub inline fn loadMat34(mem: []const f32) Mat { + return .{ + load(mem[0..4], F32x4, 0), + load(mem[4..8], F32x4, 0), + load(mem[8..12], F32x4, 0), + f32x4(0.0, 0.0, 0.0, 1.0), + }; +} + +pub inline fn storeMat34(mem: []f32, m: Mat) void { + store(mem[0..4], m[0], 0); + store(mem[4..8], m[1], 0); + store(mem[8..12], m[2], 0); +} + +pub inline fn matToArr(m: Mat) [16]f32 { + var array: [16]f32 = undefined; + storeMat(array[0..], m); + return array; +} + +pub inline fn matToArr43(m: Mat) [12]f32 { + var array: [12]f32 = undefined; + storeMat43(array[0..], m); + return array; +} + +pub inline fn matToArr34(m: Mat) [12]f32 { + var array: [12]f32 = undefined; + storeMat34(array[0..], m); + return array; +} +// ------------------------------------------------------------------------------ +// +// 5. Quaternion functions +// +// ------------------------------------------------------------------------------ +pub fn qmul(q0: Quat, q1: Quat) Quat { + var result = swizzle(q1, .w, .w, .w, .w); + var q1x = swizzle(q1, .x, .x, .x, .x); + var q1y = swizzle(q1, .y, .y, .y, .y); + var q1z = swizzle(q1, .z, .z, .z, .z); + result = result * q0; + var q0_shuf = swizzle(q0, .w, .z, .y, .x); + q1x = q1x * q0_shuf; + q0_shuf = swizzle(q0_shuf, .y, .x, .w, .z); + result = mulAdd(q1x, f32x4(1.0, -1.0, 1.0, -1.0), result); + q1y = q1y * q0_shuf; + q0_shuf = swizzle(q0_shuf, .w, .z, .y, .x); + q1y = q1y * f32x4(1.0, 1.0, -1.0, -1.0); + q1z = q1z * q0_shuf; + q1y = mulAdd(q1z, f32x4(-1.0, 1.0, 1.0, -1.0), q1y); + return result + q1y; +} +test "zmath.quaternion.mul" { + { + const q0 = f32x4(2.0, 3.0, 4.0, 1.0); + const q1 = f32x4(3.0, 2.0, 1.0, 4.0); + try expect(approxEqAbs(qmul(q0, q1), f32x4(16.0, 4.0, 22.0, -12.0), 0.0001)); + } +} + +pub fn quatToMat(quat: Quat) Mat { + return matFromQuat(quat); +} + +pub fn quatToAxisAngle(quat: Quat, axis: *Vec, angle: *f32) void { + axis.* = quat; + angle.* = 2.0 * acos(quat[3]); +} +test "zmath.quaternion.quatToAxisAngle" { + { + const q0 = quatFromNormAxisAngle(f32x4(1.0, 0.0, 0.0, 0.0), 0.25 * math.pi); + var axis: Vec = f32x4(4.0, 3.0, 2.0, 1.0); + var angle: f32 = 10.0; + quatToAxisAngle(q0, &axis, &angle); + try expect(math.approxEqAbs(f32, axis[0], @sin(@as(f32, 0.25) * math.pi * 0.5), 0.0001)); + try expect(axis[1] == 0.0); + try expect(axis[2] == 0.0); + try expect(math.approxEqAbs(f32, angle, 0.25 * math.pi, 0.0001)); + } +} + +pub fn quatFromMat(m: Mat) Quat { + const r0 = m[0]; + const r1 = m[1]; + const r2 = m[2]; + const r00 = swizzle(r0, .x, .x, .x, .x); + const r11 = swizzle(r1, .y, .y, .y, .y); + const r22 = swizzle(r2, .z, .z, .z, .z); + + const x2gey2 = (r11 - r00) <= splat(F32x4, 0.0); + const z2gew2 = (r11 + r00) <= splat(F32x4, 0.0); + const x2py2gez2pw2 = r22 <= splat(F32x4, 0.0); + + var t0 = mulAdd(r00, f32x4(1.0, -1.0, -1.0, 1.0), splat(F32x4, 1.0)); + var t1 = r11 * f32x4(-1.0, 1.0, -1.0, 1.0); + var t2 = mulAdd(r22, f32x4(-1.0, -1.0, 1.0, 1.0), t0); + const x2y2z2w2 = t1 + t2; + + t0 = @shuffle(f32, r0, r1, [4]i32{ 1, 2, ~@as(i32, 2), ~@as(i32, 1) }); + t1 = @shuffle(f32, r1, r2, [4]i32{ 0, 0, ~@as(i32, 0), ~@as(i32, 1) }); + t1 = swizzle(t1, .x, .z, .w, .y); + const xyxzyz = t0 + t1; + + t0 = @shuffle(f32, r2, r1, [4]i32{ 1, 0, ~@as(i32, 0), ~@as(i32, 0) }); + t1 = @shuffle(f32, r1, r0, [4]i32{ 2, 2, ~@as(i32, 2), ~@as(i32, 1) }); + t1 = swizzle(t1, .x, .z, .w, .y); + const xwywzw = (t0 - t1) * f32x4(-1.0, 1.0, -1.0, 1.0); + + t0 = @shuffle(f32, x2y2z2w2, xyxzyz, [4]i32{ 0, 1, ~@as(i32, 0), ~@as(i32, 0) }); + t1 = @shuffle(f32, x2y2z2w2, xwywzw, [4]i32{ 2, 3, ~@as(i32, 2), ~@as(i32, 0) }); + t2 = @shuffle(f32, xyxzyz, xwywzw, [4]i32{ 1, 2, ~@as(i32, 0), ~@as(i32, 1) }); + + const tensor0 = @shuffle(f32, t0, t2, [4]i32{ 0, 2, ~@as(i32, 0), ~@as(i32, 2) }); + const tensor1 = @shuffle(f32, t0, t2, [4]i32{ 2, 1, ~@as(i32, 1), ~@as(i32, 3) }); + const tensor2 = @shuffle(f32, t2, t1, [4]i32{ 0, 1, ~@as(i32, 0), ~@as(i32, 2) }); + const tensor3 = @shuffle(f32, t2, t1, [4]i32{ 2, 3, ~@as(i32, 2), ~@as(i32, 1) }); + + t0 = select(x2gey2, tensor0, tensor1); + t1 = select(z2gew2, tensor2, tensor3); + t2 = select(x2py2gez2pw2, t0, t1); + + return t2 / length4(t2); +} +test "zmath.quatFromMat" { + { + const q0 = quatFromAxisAngle(f32x4(1.0, 0.0, 0.0, 0.0), 0.25 * math.pi); + const q1 = quatFromMat(rotationX(0.25 * math.pi)); + try expect(approxEqAbs(q0, q1, 0.0001)); + } + { + const q0 = quatFromAxisAngle(f32x4(1.0, 2.0, 0.5, 0.0), 0.25 * math.pi); + const q1 = quatFromMat(matFromAxisAngle(f32x4(1.0, 2.0, 0.5, 0.0), 0.25 * math.pi)); + try expect(approxEqAbs(q0, q1, 0.0001)); + } + { + const q0 = quatFromRollPitchYaw(0.1 * math.pi, -0.2 * math.pi, 0.3 * math.pi); + const q1 = quatFromMat(matFromRollPitchYaw(0.1 * math.pi, -0.2 * math.pi, 0.3 * math.pi)); + try expect(approxEqAbs(q0, q1, 0.0001)); + } +} + +pub fn quatFromNormAxisAngle(axis: Vec, angle: f32) Quat { + var n = f32x4(axis[0], axis[1], axis[2], 1.0); + const sc = sincos(0.5 * angle); + return n * f32x4(sc[0], sc[0], sc[0], sc[1]); +} +pub fn quatFromAxisAngle(axis: Vec, angle: f32) Quat { + assert(!all(axis == splat(F32x4, 0.0), 3)); + assert(!all(isInf(axis), 3)); + const normal = normalize3(axis); + return quatFromNormAxisAngle(normal, angle); +} +test "zmath.quaternion.quatFromNormAxisAngle" { + { + const q0 = quatFromAxisAngle(f32x4(1.0, 0.0, 0.0, 0.0), 0.25 * math.pi); + const q1 = quatFromAxisAngle(f32x4(0.0, 1.0, 0.0, 0.0), 0.125 * math.pi); + const m0 = rotationX(0.25 * math.pi); + const m1 = rotationY(0.125 * math.pi); + const mr0 = quatToMat(qmul(q0, q1)); + const mr1 = mul(m0, m1); + try expect(approxEqAbs(mr0[0], mr1[0], 0.0001)); + try expect(approxEqAbs(mr0[1], mr1[1], 0.0001)); + try expect(approxEqAbs(mr0[2], mr1[2], 0.0001)); + try expect(approxEqAbs(mr0[3], mr1[3], 0.0001)); + } + { + const m0 = quatToMat(quatFromAxisAngle(f32x4(1.0, 2.0, 0.5, 0.0), 0.25 * math.pi)); + const m1 = matFromAxisAngle(f32x4(1.0, 2.0, 0.5, 0.0), 0.25 * math.pi); + try expect(approxEqAbs(m0[0], m1[0], 0.0001)); + try expect(approxEqAbs(m0[1], m1[1], 0.0001)); + try expect(approxEqAbs(m0[2], m1[2], 0.0001)); + try expect(approxEqAbs(m0[3], m1[3], 0.0001)); + } +} + +pub inline fn qidentity() Quat { + return f32x4(@as(f32, 0.0), @as(f32, 0.0), @as(f32, 0.0), @as(f32, 1.0)); +} + +pub inline fn conjugate(quat: Quat) Quat { + return quat * f32x4(-1.0, -1.0, -1.0, 1.0); +} + +fn inverseQuat(quat: Quat) Quat { + const l = lengthSq4(quat); + const conj = conjugate(quat); + return select(l <= splat(F32x4, math.f32_epsilon), splat(F32x4, 0.0), conj / l); +} +test "zmath.quaternion.inverseQuat" { + try expect(approxEqAbs( + inverse(f32x4(2.0, 3.0, 4.0, 1.0)), + f32x4(-1.0 / 15.0, -1.0 / 10.0, -2.0 / 15.0, 1.0 / 30.0), + 0.0001, + )); + try expect(approxEqAbs(inverse(qidentity()), qidentity(), 0.0001)); +} + +pub fn slerp(q0: Quat, q1: Quat, t: f32) Quat { + return slerpV(q0, q1, splat(F32x4, t)); +} +pub fn slerpV(q0: Quat, q1: Quat, t: F32x4) Quat { + var cos_omega = dot4(q0, q1); + const sign = select(cos_omega < splat(F32x4, 0.0), splat(F32x4, -1.0), splat(F32x4, 1.0)); + + cos_omega = cos_omega * sign; + const sin_omega = sqrt(splat(F32x4, 1.0) - cos_omega * cos_omega); + + const omega = atan2(sin_omega, cos_omega); + + var v01 = t; + v01 = xorInt(andInt(v01, f32x4_mask2), f32x4_sign_mask1); + v01 = f32x4(1.0, 0.0, 0.0, 0.0) + v01; + + var s0 = sin(v01 * omega) / sin_omega; + s0 = select(cos_omega < splat(F32x4, 1.0 - 0.00001), s0, v01); + + var s1 = swizzle(s0, .y, .y, .y, .y); + s0 = swizzle(s0, .x, .x, .x, .x); + + return q0 * s0 + sign * q1 * s1; +} +test "zmath.quaternion.slerp" { + const from = f32x4(0.0, 0.0, 0.0, 1.0); + const to = f32x4(0.5, 0.5, -0.5, 0.5); + const result = slerp(from, to, 0.5); + try expect(approxEqAbs(result, f32x4(0.28867513, 0.28867513, -0.28867513, 0.86602540), 0.0001)); +} + +pub fn quatFromRollPitchYaw(pitch: f32, yaw: f32, roll: f32) Quat { + return quatFromRollPitchYawV(f32x4(pitch, yaw, roll, 0.0)); +} +pub fn quatFromRollPitchYawV(angles: Vec) Quat { // | pitch | yaw | roll | 0 | + const sc = sincos(splat(Vec, 0.5) * angles); + const p0 = @shuffle(f32, sc[1], sc[0], [4]i32{ ~@as(i32, 0), 0, 0, 0 }); + const p1 = @shuffle(f32, sc[0], sc[1], [4]i32{ ~@as(i32, 0), 0, 0, 0 }); + const y0 = @shuffle(f32, sc[1], sc[0], [4]i32{ 1, ~@as(i32, 1), 1, 1 }); + const y1 = @shuffle(f32, sc[0], sc[1], [4]i32{ 1, ~@as(i32, 1), 1, 1 }); + const r0 = @shuffle(f32, sc[1], sc[0], [4]i32{ 2, 2, ~@as(i32, 2), 2 }); + const r1 = @shuffle(f32, sc[0], sc[1], [4]i32{ 2, 2, ~@as(i32, 2), 2 }); + const q1 = p1 * f32x4(1.0, -1.0, -1.0, 1.0) * y1; + const q0 = p0 * y0 * r0; + return mulAdd(q1, r1, q0); +} +test "zmath.quaternion.quatFromRollPitchYawV" { + { + const m0 = quatToMat(quatFromRollPitchYawV(f32x4(0.25 * math.pi, 0.0, 0.0, 0.0))); + const m1 = rotationX(0.25 * math.pi); + try expect(approxEqAbs(m0[0], m1[0], 0.0001)); + try expect(approxEqAbs(m0[1], m1[1], 0.0001)); + try expect(approxEqAbs(m0[2], m1[2], 0.0001)); + try expect(approxEqAbs(m0[3], m1[3], 0.0001)); + } + { + const m0 = quatToMat(quatFromRollPitchYaw(0.1 * math.pi, 0.2 * math.pi, 0.3 * math.pi)); + const m1 = mul( + rotationZ(0.3 * math.pi), + mul(rotationX(0.1 * math.pi), rotationY(0.2 * math.pi)), + ); + try expect(approxEqAbs(m0[0], m1[0], 0.0001)); + try expect(approxEqAbs(m0[1], m1[1], 0.0001)); + try expect(approxEqAbs(m0[2], m1[2], 0.0001)); + try expect(approxEqAbs(m0[3], m1[3], 0.0001)); + } +} +// ------------------------------------------------------------------------------ +// +// 6. Color functions +// +// ------------------------------------------------------------------------------ +pub fn adjustSaturation(color: F32x4, saturation: f32) F32x4 { + const luminance = dot3(f32x4(0.2125, 0.7154, 0.0721, 0.0), color); + var result = mulAdd(color - luminance, f32x4s(saturation), luminance); + result[3] = color[3]; + return result; +} + +pub fn adjustContrast(color: F32x4, contrast: f32) F32x4 { + var result = mulAdd(color - f32x4s(0.5), f32x4s(contrast), f32x4s(0.5)); + result[3] = color[3]; + return result; +} + +pub fn rgbToHsl(rgb: F32x4) F32x4 { + const r = swizzle(rgb, .x, .x, .x, .x); + const g = swizzle(rgb, .y, .y, .y, .y); + const b = swizzle(rgb, .z, .z, .z, .z); + + const minv = min(r, min(g, b)); + const maxv = max(r, max(g, b)); + + const l = (minv + maxv) * f32x4s(0.5); + const d = maxv - minv; + const la = select(boolx4(true, true, true, false), l, rgb); + + if (all(d < f32x4s(math.f32_epsilon), 3)) { + return select(boolx4(true, true, false, false), f32x4s(0.0), la); + } else { + var s: F32x4 = undefined; + var h: F32x4 = undefined; + + const d2 = minv + maxv; + + if (all(l > f32x4s(0.5), 3)) { + s = d / (f32x4s(2.0) - d2); + } else { + s = d / d2; + } + + if (all(r == maxv, 3)) { + h = (g - b) / d; + } else if (all(g == maxv, 3)) { + h = f32x4s(2.0) + (b - r) / d; + } else { + h = f32x4s(4.0) + (r - g) / d; + } + + h /= f32x4s(6.0); + + if (all(h < f32x4s(0.0), 3)) { + h += f32x4s(1.0); + } + + const lha = select(boolx4(true, true, false, false), h, la); + return select(boolx4(true, false, true, true), lha, s); + } +} +test "zmath.color.rgbToHsl" { + try expect(approxEqAbs(rgbToHsl(f32x4(0.2, 0.4, 0.8, 1.0)), f32x4(0.6111, 0.6, 0.5, 1.0), 0.0001)); + try expect(approxEqAbs(rgbToHsl(f32x4(1.0, 0.0, 0.0, 0.5)), f32x4(0.0, 1.0, 0.5, 0.5), 0.0001)); + try expect(approxEqAbs(rgbToHsl(f32x4(0.0, 1.0, 0.0, 0.25)), f32x4(0.3333, 1.0, 0.5, 0.25), 0.0001)); + try expect(approxEqAbs(rgbToHsl(f32x4(0.0, 0.0, 1.0, 1.0)), f32x4(0.6666, 1.0, 0.5, 1.0), 0.0001)); + try expect(approxEqAbs(rgbToHsl(f32x4(0.0, 0.0, 0.0, 1.0)), f32x4(0.0, 0.0, 0.0, 1.0), 0.0001)); + try expect(approxEqAbs(rgbToHsl(f32x4(1.0, 1.0, 1.0, 1.0)), f32x4(0.0, 0.0, 1.0, 1.0), 0.0001)); +} + +fn hueToClr(p: F32x4, q: F32x4, h: F32x4) F32x4 { + var t = h; + + if (all(t < f32x4s(0.0), 3)) + t += f32x4s(1.0); + + if (all(t > f32x4s(1.0), 3)) + t -= f32x4s(1.0); + + if (all(t < f32x4s(1.0 / 6.0), 3)) + return mulAdd(q - p, f32x4s(6.0) * t, p); + + if (all(t < f32x4s(0.5), 3)) + return q; + + if (all(t < f32x4s(2.0 / 3.0), 3)) + return mulAdd(q - p, f32x4s(6.0) * (f32x4s(2.0 / 3.0) - t), p); + + return p; +} + +pub fn hslToRgb(hsl: F32x4) F32x4 { + const s = swizzle(hsl, .y, .y, .y, .y); + const l = swizzle(hsl, .z, .z, .z, .z); + + if (all(isNearEqual(s, f32x4s(0.0), f32x4s(math.f32_epsilon)), 3)) { + return select(boolx4(true, true, true, false), l, hsl); + } else { + const h = swizzle(hsl, .x, .x, .x, .x); + var q: F32x4 = undefined; + if (all(l < f32x4s(0.5), 3)) { + q = l * (f32x4s(1.0) + s); + } else { + q = (l + s) - (l * s); + } + + const p = f32x4s(2.0) * l - q; + + const r = hueToClr(p, q, h + f32x4s(1.0 / 3.0)); + const g = hueToClr(p, q, h); + const b = hueToClr(p, q, h - f32x4s(1.0 / 3.0)); + + const rg = select(boolx4(true, false, false, false), r, g); + const ba = select(boolx4(true, true, true, false), b, hsl); + return select(boolx4(true, true, false, false), rg, ba); + } +} +test "zmath.color.hslToRgb" { + try expect(approxEqAbs(f32x4(0.2, 0.4, 0.8, 1.0), hslToRgb(f32x4(0.6111, 0.6, 0.5, 1.0)), 0.0001)); + try expect(approxEqAbs(f32x4(1.0, 0.0, 0.0, 0.5), hslToRgb(f32x4(0.0, 1.0, 0.5, 0.5)), 0.0001)); + try expect(approxEqAbs(f32x4(0.0, 1.0, 0.0, 0.25), hslToRgb(f32x4(0.3333, 1.0, 0.5, 0.25)), 0.0005)); + try expect(approxEqAbs(f32x4(0.0, 0.0, 1.0, 1.0), hslToRgb(f32x4(0.6666, 1.0, 0.5, 1.0)), 0.0005)); + try expect(approxEqAbs(f32x4(0.0, 0.0, 0.0, 1.0), hslToRgb(f32x4(0.0, 0.0, 0.0, 1.0)), 0.0001)); + try expect(approxEqAbs(f32x4(1.0, 1.0, 1.0, 1.0), hslToRgb(f32x4(0.0, 0.0, 1.0, 1.0)), 0.0001)); + try expect(approxEqAbs(hslToRgb(rgbToHsl(f32x4(1.0, 1.0, 1.0, 1.0))), f32x4(1.0, 1.0, 1.0, 1.0), 0.0005)); + try expect(approxEqAbs( + hslToRgb(rgbToHsl(f32x4(0.82198, 0.1839, 0.632, 1.0))), + f32x4(0.82198, 0.1839, 0.632, 1.0), + 0.0005, + )); + try expect(approxEqAbs( + rgbToHsl(hslToRgb(f32x4(0.82198, 0.1839, 0.632, 1.0))), + f32x4(0.82198, 0.1839, 0.632, 1.0), + 0.0005, + )); + try expect(approxEqAbs( + rgbToHsl(hslToRgb(f32x4(0.1839, 0.82198, 0.632, 1.0))), + f32x4(0.1839, 0.82198, 0.632, 1.0), + 0.0005, + )); + try expect(approxEqAbs( + hslToRgb(rgbToHsl(f32x4(0.1839, 0.632, 0.82198, 1.0))), + f32x4(0.1839, 0.632, 0.82198, 1.0), + 0.0005, + )); +} + +pub fn rgbToHsv(rgb: F32x4) F32x4 { + const r = swizzle(rgb, .x, .x, .x, .x); + const g = swizzle(rgb, .y, .y, .y, .y); + const b = swizzle(rgb, .z, .z, .z, .z); + + const minv = min(r, min(g, b)); + const v = max(r, max(g, b)); + const d = v - minv; + const s = if (all(isNearEqual(v, f32x4s(0.0), f32x4s(math.f32_epsilon)), 3)) f32x4s(0.0) else d / v; + + if (all(d < f32x4s(math.f32_epsilon), 3)) { + const hv = select(boolx4(true, false, false, false), f32x4s(0.0), v); + const hva = select(boolx4(true, true, true, false), hv, rgb); + return select(boolx4(true, false, true, true), hva, s); + } else { + var h: F32x4 = undefined; + if (all(r == v, 3)) { + h = (g - b) / d; + if (all(g < b, 3)) + h += f32x4s(6.0); + } else if (all(g == v, 3)) { + h = f32x4s(2.0) + (b - r) / d; + } else { + h = f32x4s(4.0) + (r - g) / d; + } + + h /= f32x4s(6.0); + const hv = select(boolx4(true, false, false, false), h, v); + const hva = select(boolx4(true, true, true, false), hv, rgb); + return select(boolx4(true, false, true, true), hva, s); + } +} +test "zmath.color.rgbToHsv" { + try expect(approxEqAbs(rgbToHsv(f32x4(0.2, 0.4, 0.8, 1.0)), f32x4(0.6111, 0.75, 0.8, 1.0), 0.0001)); + try expect(approxEqAbs(rgbToHsv(f32x4(0.4, 0.2, 0.8, 1.0)), f32x4(0.7222, 0.75, 0.8, 1.0), 0.0001)); + try expect(approxEqAbs(rgbToHsv(f32x4(0.4, 0.8, 0.2, 1.0)), f32x4(0.2777, 0.75, 0.8, 1.0), 0.0001)); + try expect(approxEqAbs(rgbToHsv(f32x4(1.0, 0.0, 0.0, 0.5)), f32x4(0.0, 1.0, 1.0, 0.5), 0.0001)); + try expect(approxEqAbs(rgbToHsv(f32x4(0.0, 1.0, 0.0, 0.25)), f32x4(0.3333, 1.0, 1.0, 0.25), 0.0001)); + try expect(approxEqAbs(rgbToHsv(f32x4(0.0, 0.0, 1.0, 1.0)), f32x4(0.6666, 1.0, 1.0, 1.0), 0.0001)); + try expect(approxEqAbs(rgbToHsv(f32x4(0.0, 0.0, 0.0, 1.0)), f32x4(0.0, 0.0, 0.0, 1.0), 0.0001)); + try expect(approxEqAbs(rgbToHsv(f32x4(1.0, 1.0, 1.0, 1.0)), f32x4(0.0, 0.0, 1.0, 1.0), 0.0001)); +} + +pub fn hsvToRgb(hsv: F32x4) F32x4 { + const h = swizzle(hsv, .x, .x, .x, .x); + const s = swizzle(hsv, .y, .y, .y, .y); + const v = swizzle(hsv, .z, .z, .z, .z); + + const h6 = h * f32x4s(6.0); + const i = floor(h6); + const f = h6 - i; + + const p = v * (f32x4s(1.0) - s); + const q = v * (f32x4s(1.0) - f * s); + const t = v * (f32x4s(1.0) - (f32x4s(1.0) - f) * s); + + const ii = @floatToInt(i32, mod(i, f32x4s(6.0))[0]); + const rgb = switch (ii) { + 0 => blk: { + const vt = select(boolx4(true, false, false, false), v, t); + break :blk select(boolx4(true, true, false, false), vt, p); + }, + 1 => blk: { + const qv = select(boolx4(true, false, false, false), q, v); + break :blk select(boolx4(true, true, false, false), qv, p); + }, + 2 => blk: { + const pv = select(boolx4(true, false, false, false), p, v); + break :blk select(boolx4(true, true, false, false), pv, t); + }, + 3 => blk: { + const pq = select(boolx4(true, false, false, false), p, q); + break :blk select(boolx4(true, true, false, false), pq, v); + }, + 4 => blk: { + const tp = select(boolx4(true, false, false, false), t, p); + break :blk select(boolx4(true, true, false, false), tp, v); + }, + 5 => blk: { + const vp = select(boolx4(true, false, false, false), v, p); + break :blk select(boolx4(true, true, false, false), vp, q); + }, + else => unreachable, + }; + return select(boolx4(true, true, true, false), rgb, hsv); +} +test "zmath.color.hsvToRgb" { + const epsilon = 0.0005; + try expect(approxEqAbs(f32x4(0.2, 0.4, 0.8, 1.0), hsvToRgb(f32x4(0.6111, 0.75, 0.8, 1.0)), epsilon)); + try expect(approxEqAbs(f32x4(0.4, 0.2, 0.8, 1.0), hsvToRgb(f32x4(0.7222, 0.75, 0.8, 1.0)), epsilon)); + try expect(approxEqAbs(f32x4(0.4, 0.8, 0.2, 1.0), hsvToRgb(f32x4(0.2777, 0.75, 0.8, 1.0)), epsilon)); + try expect(approxEqAbs(f32x4(1.0, 0.0, 0.0, 0.5), hsvToRgb(f32x4(0.0, 1.0, 1.0, 0.5)), epsilon)); + try expect(approxEqAbs(f32x4(0.0, 1.0, 0.0, 0.25), hsvToRgb(f32x4(0.3333, 1.0, 1.0, 0.25)), epsilon)); + try expect(approxEqAbs(f32x4(0.0, 0.0, 1.0, 1.0), hsvToRgb(f32x4(0.6666, 1.0, 1.0, 1.0)), epsilon)); + try expect(approxEqAbs(f32x4(0.0, 0.0, 0.0, 1.0), hsvToRgb(f32x4(0.0, 0.0, 0.0, 1.0)), epsilon)); + try expect(approxEqAbs(f32x4(1.0, 1.0, 1.0, 1.0), hsvToRgb(f32x4(0.0, 0.0, 1.0, 1.0)), epsilon)); + try expect(approxEqAbs( + hsvToRgb(rgbToHsv(f32x4(0.1839, 0.632, 0.82198, 1.0))), + f32x4(0.1839, 0.632, 0.82198, 1.0), + epsilon, + )); + try expect(approxEqAbs( + hsvToRgb(rgbToHsv(f32x4(0.82198, 0.1839, 0.632, 1.0))), + f32x4(0.82198, 0.1839, 0.632, 1.0), + epsilon, + )); + try expect(approxEqAbs( + rgbToHsv(hsvToRgb(f32x4(0.82198, 0.1839, 0.632, 1.0))), + f32x4(0.82198, 0.1839, 0.632, 1.0), + epsilon, + )); + try expect(approxEqAbs( + rgbToHsv(hsvToRgb(f32x4(0.1839, 0.82198, 0.632, 1.0))), + f32x4(0.1839, 0.82198, 0.632, 1.0), + epsilon, + )); +} + +pub fn rgbToSrgb(rgb: F32x4) F32x4 { + const static = struct { + const cutoff = f32x4(0.0031308, 0.0031308, 0.0031308, 1.0); + const linear = f32x4(12.92, 12.92, 12.92, 1.0); + const scale = f32x4(1.055, 1.055, 1.055, 1.0); + const bias = f32x4(0.055, 0.055, 0.055, 1.0); + const rgamma = 1.0 / 2.4; + }; + var v = saturate(rgb); + const v0 = v * static.linear; + const v1 = static.scale * f32x4( + math.pow(f32, v[0], static.rgamma), + math.pow(f32, v[1], static.rgamma), + math.pow(f32, v[2], static.rgamma), + v[3], + ) - static.bias; + v = select(v < static.cutoff, v0, v1); + return select(boolx4(true, true, true, false), v, rgb); +} +test "zmath.color.rgbToSrgb" { + const epsilon = 0.001; + try expect(approxEqAbs(rgbToSrgb(f32x4(0.2, 0.4, 0.8, 1.0)), f32x4(0.484, 0.665, 0.906, 1.0), epsilon)); +} + +pub fn srgbToRgb(srgb: F32x4) F32x4 { + const static = struct { + const cutoff = f32x4(0.04045, 0.04045, 0.04045, 1.0); + const rlinear = f32x4(1.0 / 12.92, 1.0 / 12.92, 1.0 / 12.92, 1.0); + const scale = f32x4(1.0 / 1.055, 1.0 / 1.055, 1.0 / 1.055, 1.0); + const bias = f32x4(0.055, 0.055, 0.055, 1.0); + const gamma = 2.4; + }; + var v = saturate(srgb); + const v0 = v * static.rlinear; + var v1 = static.scale * (v + static.bias); + v1 = f32x4( + math.pow(f32, v1[0], static.gamma), + math.pow(f32, v1[1], static.gamma), + math.pow(f32, v1[2], static.gamma), + v1[3], + ); + v = select(v > static.cutoff, v1, v0); + return select(boolx4(true, true, true, false), v, srgb); +} +test "zmath.color.srgbToRgb" { + const epsilon = 0.0007; + try expect(approxEqAbs(f32x4(0.2, 0.4, 0.8, 1.0), srgbToRgb(f32x4(0.484, 0.665, 0.906, 1.0)), epsilon)); + try expect(approxEqAbs( + rgbToSrgb(srgbToRgb(f32x4(0.1839, 0.82198, 0.632, 1.0))), + f32x4(0.1839, 0.82198, 0.632, 1.0), + epsilon, + )); +} +// ------------------------------------------------------------------------------ +// +// X. Misc functions +// +// ------------------------------------------------------------------------------ +pub fn linePointDistance(linept0: Vec, linept1: Vec, pt: Vec) F32x4 { + const ptvec = pt - linept0; + const linevec = linept1 - linept0; + const scale = dot3(ptvec, linevec) / lengthSq3(linevec); + return length3(ptvec - linevec * scale); +} +test "zmath.linePointDistance" { + { + const linept0 = f32x4(-1.0, -2.0, -3.0, 1.0); + const linept1 = f32x4(1.0, 2.0, 3.0, 1.0); + const pt = f32x4(1.0, 1.0, 1.0, 1.0); + var v = linePointDistance(linept0, linept1, pt); + try expect(approxEqAbs(v, splat(F32x4, 0.654), 0.001)); + } +} + +fn sin32(v: f32) f32 { + var y = v - math.tau * @round(v * 1.0 / math.tau); + + if (y > 0.5 * math.pi) { + y = math.pi - y; + } else if (y < -math.pi * 0.5) { + y = -math.pi - y; + } + const y2 = y * y; + + // 11-degree minimax approximation + var sinv = mulAdd(@as(f32, -2.3889859e-08), y2, 2.7525562e-06); + sinv = mulAdd(sinv, y2, -0.00019840874); + sinv = mulAdd(sinv, y2, 0.0083333310); + sinv = mulAdd(sinv, y2, -0.16666667); + return y * mulAdd(sinv, y2, 1.0); +} +fn cos32(v: f32) f32 { + var y = v - math.tau * @round(v * 1.0 / math.tau); + + const sign = blk: { + if (y > 0.5 * math.pi) { + y = math.pi - y; + break :blk @as(f32, -1.0); + } else if (y < -math.pi * 0.5) { + y = -math.pi - y; + break :blk @as(f32, -1.0); + } else { + break :blk @as(f32, 1.0); + } + }; + const y2 = y * y; + + // 10-degree minimax approximation + var cosv = mulAdd(@as(f32, -2.6051615e-07), y2, 2.4760495e-05); + cosv = mulAdd(cosv, y2, -0.0013888378); + cosv = mulAdd(cosv, y2, 0.041666638); + cosv = mulAdd(cosv, y2, -0.5); + return sign * mulAdd(cosv, y2, 1.0); +} +fn sincos32(v: f32) [2]f32 { + var y = v - math.tau * @round(v * 1.0 / math.tau); + + const sign = blk: { + if (y > 0.5 * math.pi) { + y = math.pi - y; + break :blk @as(f32, -1.0); + } else if (y < -math.pi * 0.5) { + y = -math.pi - y; + break :blk @as(f32, -1.0); + } else { + break :blk @as(f32, 1.0); + } + }; + const y2 = y * y; + + // 11-degree minimax approximation + var sinv = mulAdd(@as(f32, -2.3889859e-08), y2, 2.7525562e-06); + sinv = mulAdd(sinv, y2, -0.00019840874); + sinv = mulAdd(sinv, y2, 0.0083333310); + sinv = mulAdd(sinv, y2, -0.16666667); + sinv = y * mulAdd(sinv, y2, 1.0); + + // 10-degree minimax approximation + var cosv = mulAdd(@as(f32, -2.6051615e-07), y2, 2.4760495e-05); + cosv = mulAdd(cosv, y2, -0.0013888378); + cosv = mulAdd(cosv, y2, 0.041666638); + cosv = mulAdd(cosv, y2, -0.5); + cosv = sign * mulAdd(cosv, y2, 1.0); + + return .{ sinv, cosv }; +} +test "zmath.sincos32" { + const epsilon = 0.0001; + + try expect(math.isNan(sincos32(math.inf_f32)[0])); + try expect(math.isNan(sincos32(math.inf_f32)[1])); + try expect(math.isNan(sincos32(-math.inf_f32)[0])); + try expect(math.isNan(sincos32(-math.inf_f32)[1])); + try expect(math.isNan(sincos32(math.nan_f32)[0])); + try expect(math.isNan(sincos32(-math.nan_f32)[1])); + + try expect(math.isNan(sin32(math.inf_f32))); + try expect(math.isNan(cos32(math.inf_f32))); + try expect(math.isNan(sin32(-math.inf_f32))); + try expect(math.isNan(cos32(-math.inf_f32))); + try expect(math.isNan(sin32(math.nan_f32))); + try expect(math.isNan(cos32(-math.nan_f32))); + + var f: f32 = -100.0; + var i: u32 = 0; + while (i < 100) : (i += 1) { + const sc = sincos32(f); + const s0 = sin32(f); + const c0 = cos32(f); + const s = @sin(f); + const c = @cos(f); + try expect(math.approxEqAbs(f32, sc[0], s, epsilon)); + try expect(math.approxEqAbs(f32, sc[1], c, epsilon)); + try expect(math.approxEqAbs(f32, s0, s, epsilon)); + try expect(math.approxEqAbs(f32, c0, c, epsilon)); + f += 0.12345 * @intToFloat(f32, i); + } +} + +fn asin32(v: f32) f32 { + const x = @fabs(v); + var omx = 1.0 - x; + if (omx < 0.0) { + omx = 0.0; + } + const root = @sqrt(omx); + + // 7-degree minimax approximation + var result = mulAdd(@as(f32, -0.0012624911), x, 0.0066700901); + result = mulAdd(result, x, -0.0170881256); + result = mulAdd(result, x, 0.0308918810); + result = mulAdd(result, x, -0.0501743046); + result = mulAdd(result, x, 0.0889789874); + result = mulAdd(result, x, -0.2145988016); + result = root * mulAdd(result, x, 1.5707963050); + + return if (v >= 0.0) 0.5 * math.pi - result else result - 0.5 * math.pi; +} +test "zmath.asin32" { + const epsilon = 0.0001; + + try expect(math.approxEqAbs(f32, asin(@as(f32, -1.1)), -0.5 * math.pi, epsilon)); + try expect(math.approxEqAbs(f32, asin(@as(f32, 1.1)), 0.5 * math.pi, epsilon)); + try expect(math.approxEqAbs(f32, asin(@as(f32, -1000.1)), -0.5 * math.pi, epsilon)); + try expect(math.approxEqAbs(f32, asin(@as(f32, 100000.1)), 0.5 * math.pi, epsilon)); + try expect(math.isNan(asin(math.inf_f32))); + try expect(math.isNan(asin(-math.inf_f32))); + try expect(math.isNan(asin(math.nan_f32))); + try expect(math.isNan(asin(-math.nan_f32))); + + try expect(approxEqAbs(asin(splat(F32x8, -100.0)), splat(F32x8, -0.5 * math.pi), epsilon)); + try expect(approxEqAbs(asin(splat(F32x16, 100.0)), splat(F32x16, 0.5 * math.pi), epsilon)); + try expect(all(isNan(asin(splat(F32x4, math.inf_f32))), 0) == true); + try expect(all(isNan(asin(splat(F32x4, -math.inf_f32))), 0) == true); + try expect(all(isNan(asin(splat(F32x4, math.nan_f32))), 0) == true); + try expect(all(isNan(asin(splat(F32x4, math.qnan_f32))), 0) == true); + + var f: f32 = -1.0; + var i: u32 = 0; + while (i < 8) : (i += 1) { + const r0 = asin32(f); + const r1 = math.asin(f); + const r4 = asin(splat(F32x4, f)); + const r8 = asin(splat(F32x8, f)); + const r16 = asin(splat(F32x16, f)); + try expect(math.approxEqAbs(f32, r0, r1, epsilon)); + try expect(approxEqAbs(r4, splat(F32x4, r1), epsilon)); + try expect(approxEqAbs(r8, splat(F32x8, r1), epsilon)); + try expect(approxEqAbs(r16, splat(F32x16, r1), epsilon)); + f += 0.09 * @intToFloat(f32, i); + } +} + +fn acos32(v: f32) f32 { + const x = @fabs(v); + var omx = 1.0 - x; + if (omx < 0.0) { + omx = 0.0; + } + const root = @sqrt(omx); + + // 7-degree minimax approximation + var result = mulAdd(@as(f32, -0.0012624911), x, 0.0066700901); + result = mulAdd(result, x, -0.0170881256); + result = mulAdd(result, x, 0.0308918810); + result = mulAdd(result, x, -0.0501743046); + result = mulAdd(result, x, 0.0889789874); + result = mulAdd(result, x, -0.2145988016); + result = root * mulAdd(result, x, 1.5707963050); + + return if (v >= 0.0) result else math.pi - result; +} +test "zmath.acos32" { + const epsilon = 0.1; + + try expect(math.approxEqAbs(f32, acos(@as(f32, -1.1)), math.pi, epsilon)); + try expect(math.approxEqAbs(f32, acos(@as(f32, -10000.1)), math.pi, epsilon)); + try expect(math.approxEqAbs(f32, acos(@as(f32, 1.1)), 0.0, epsilon)); + try expect(math.approxEqAbs(f32, acos(@as(f32, 1000.1)), 0.0, epsilon)); + try expect(math.isNan(acos(math.inf_f32))); + try expect(math.isNan(acos(-math.inf_f32))); + try expect(math.isNan(acos(math.nan_f32))); + try expect(math.isNan(acos(-math.nan_f32))); + + try expect(approxEqAbs(acos(splat(F32x8, -100.0)), splat(F32x8, math.pi), epsilon)); + try expect(approxEqAbs(acos(splat(F32x16, 100.0)), splat(F32x16, 0.0), epsilon)); + try expect(all(isNan(acos(splat(F32x4, math.inf_f32))), 0) == true); + try expect(all(isNan(acos(splat(F32x4, -math.inf_f32))), 0) == true); + try expect(all(isNan(acos(splat(F32x4, math.nan_f32))), 0) == true); + try expect(all(isNan(acos(splat(F32x4, math.qnan_f32))), 0) == true); + + var f: f32 = -1.0; + var i: u32 = 0; + while (i < 8) : (i += 1) { + const r0 = acos32(f); + const r1 = math.acos(f); + const r4 = acos(splat(F32x4, f)); + const r8 = acos(splat(F32x8, f)); + const r16 = acos(splat(F32x16, f)); + try expect(math.approxEqAbs(f32, r0, r1, epsilon)); + try expect(approxEqAbs(r4, splat(F32x4, r1), epsilon)); + try expect(approxEqAbs(r8, splat(F32x8, r1), epsilon)); + try expect(approxEqAbs(r16, splat(F32x16, r1), epsilon)); + f += 0.09 * @intToFloat(f32, i); + } +} + +pub fn modAngle32(in_angle: f32) f32 { + const angle = in_angle + math.pi; + var temp: f32 = @fabs(angle); + temp = temp - (2.0 * math.pi * @intToFloat(f32, @floatToInt(i32, temp / math.pi))); + temp = temp - math.pi; + if (angle < 0.0) { + temp = -temp; + } + return temp; +} + +pub fn cmulSoa(re0: anytype, im0: anytype, re1: anytype, im1: anytype) [2]@TypeOf(re0, im0, re1, im1) { + const re0_re1 = re0 * re1; + const re0_im1 = re0 * im1; + return .{ + mulAdd(-im0, im1, re0_re1), // re + mulAdd(re1, im0, re0_im1), // im + }; +} +// ------------------------------------------------------------------------------ +// +// FFT (implementation based on xdsp.h from DirectXMath) +// +// ------------------------------------------------------------------------------ +fn fftButterflyDit4_1(re0: *F32x4, im0: *F32x4) void { + const re0l = swizzle(re0.*, .x, .x, .y, .y); + const re0h = swizzle(re0.*, .z, .z, .w, .w); + + const im0l = swizzle(im0.*, .x, .x, .y, .y); + const im0h = swizzle(im0.*, .z, .z, .w, .w); + + const re_temp = mulAdd(re0h, f32x4(1.0, -1.0, 1.0, -1.0), re0l); + const im_temp = mulAdd(im0h, f32x4(1.0, -1.0, 1.0, -1.0), im0l); + + const re_shuf0 = @shuffle(f32, re_temp, im_temp, [4]i32{ 2, 3, ~@as(i32, 2), ~@as(i32, 3) }); + const re_shuf = swizzle(re_shuf0, .x, .w, .x, .w); + const im_shuf = swizzle(re_shuf0, .z, .y, .z, .y); + + const re_templ = swizzle(re_temp, .x, .y, .x, .y); + const im_templ = swizzle(im_temp, .x, .y, .x, .y); + + re0.* = mulAdd(re_shuf, f32x4(1.0, 1.0, -1.0, -1.0), re_templ); + im0.* = mulAdd(im_shuf, f32x4(1.0, -1.0, -1.0, 1.0), im_templ); +} + +fn fftButterflyDit4_4( + re0: *F32x4, + re1: *F32x4, + re2: *F32x4, + re3: *F32x4, + im0: *F32x4, + im1: *F32x4, + im2: *F32x4, + im3: *F32x4, + unity_table_re: []const F32x4, + unity_table_im: []const F32x4, + stride: u32, + last: bool, +) void { + const re_temp0 = re0.* + re2.*; + const im_temp0 = im0.* + im2.*; + + const re_temp2 = re1.* + re3.*; + const im_temp2 = im1.* + im3.*; + + const re_temp1 = re0.* - re2.*; + const im_temp1 = im0.* - im2.*; + + const re_temp3 = re1.* - re3.*; + const im_temp3 = im1.* - im3.*; + + var re_temp4 = re_temp0 + re_temp2; + var im_temp4 = im_temp0 + im_temp2; + + var re_temp5 = re_temp1 + im_temp3; + var im_temp5 = im_temp1 - re_temp3; + + var re_temp6 = re_temp0 - re_temp2; + var im_temp6 = im_temp0 - im_temp2; + + var re_temp7 = re_temp1 - im_temp3; + var im_temp7 = im_temp1 + re_temp3; + + { + const re_im = cmulSoa(re_temp5, im_temp5, unity_table_re[stride], unity_table_im[stride]); + re_temp5 = re_im[0]; + im_temp5 = re_im[1]; + } + { + const re_im = cmulSoa(re_temp6, im_temp6, unity_table_re[stride * 2], unity_table_im[stride * 2]); + re_temp6 = re_im[0]; + im_temp6 = re_im[1]; + } + { + const re_im = cmulSoa(re_temp7, im_temp7, unity_table_re[stride * 3], unity_table_im[stride * 3]); + re_temp7 = re_im[0]; + im_temp7 = re_im[1]; + } + + if (last) { + fftButterflyDit4_1(&re_temp4, &im_temp4); + fftButterflyDit4_1(&re_temp5, &im_temp5); + fftButterflyDit4_1(&re_temp6, &im_temp6); + fftButterflyDit4_1(&re_temp7, &im_temp7); + } + + re0.* = re_temp4; + im0.* = im_temp4; + + re1.* = re_temp5; + im1.* = im_temp5; + + re2.* = re_temp6; + im2.* = im_temp6; + + re3.* = re_temp7; + im3.* = im_temp7; +} + +fn fft4(re: []F32x4, im: []F32x4, count: u32) void { + assert(std.math.isPowerOfTwo(count)); + assert(re.len >= count); + assert(im.len >= count); + + var index: u32 = 0; + while (index < count) : (index += 1) { + fftButterflyDit4_1(&re[index], &im[index]); + } +} +test "zmath.fft4" { + const epsilon = 0.0001; + var re = [_]F32x4{f32x4(1.0, 2.0, 3.0, 4.0)}; + var im = [_]F32x4{f32x4s(0.0)}; + fft4(re[0..], im[0..], 1); + + var re_uns: [1]F32x4 = undefined; + var im_uns: [1]F32x4 = undefined; + fftUnswizzle(re[0..], re_uns[0..]); + fftUnswizzle(im[0..], im_uns[0..]); + + try expect(approxEqAbs(re_uns[0], f32x4(10.0, -2.0, -2.0, -2.0), epsilon)); + try expect(approxEqAbs(im_uns[0], f32x4(0.0, 2.0, 0.0, -2.0), epsilon)); +} + +fn fft8(re: []F32x4, im: []F32x4, count: u32) void { + assert(std.math.isPowerOfTwo(count)); + assert(re.len >= 2 * count); + assert(im.len >= 2 * count); + + var index: u32 = 0; + while (index < count) : (index += 1) { + var pre = re[index * 2 ..]; + var pim = im[index * 2 ..]; + + var odds_re = @shuffle(f32, pre[0], pre[1], [4]i32{ 1, 3, ~@as(i32, 1), ~@as(i32, 3) }); + var evens_re = @shuffle(f32, pre[0], pre[1], [4]i32{ 0, 2, ~@as(i32, 0), ~@as(i32, 2) }); + var odds_im = @shuffle(f32, pim[0], pim[1], [4]i32{ 1, 3, ~@as(i32, 1), ~@as(i32, 3) }); + var evens_im = @shuffle(f32, pim[0], pim[1], [4]i32{ 0, 2, ~@as(i32, 0), ~@as(i32, 2) }); + fftButterflyDit4_1(&odds_re, &odds_im); + fftButterflyDit4_1(&evens_re, &evens_im); + + { + const re_im = cmulSoa( + odds_re, + odds_im, + f32x4(1.0, 0.70710677, 0.0, -0.70710677), + f32x4(0.0, -0.70710677, -1.0, -0.70710677), + ); + pre[0] = evens_re + re_im[0]; + pim[0] = evens_im + re_im[1]; + } + { + const re_im = cmulSoa( + odds_re, + odds_im, + f32x4(-1.0, -0.70710677, 0.0, 0.70710677), + f32x4(0.0, 0.70710677, 1.0, 0.70710677), + ); + pre[1] = evens_re + re_im[0]; + pim[1] = evens_im + re_im[1]; + } + } +} +test "zmath.fft8" { + const epsilon = 0.0001; + var re = [_]F32x4{ f32x4(1.0, 2.0, 3.0, 4.0), f32x4(5.0, 6.0, 7.0, 8.0) }; + var im = [_]F32x4{ f32x4s(0.0), f32x4s(0.0) }; + fft8(re[0..], im[0..], 1); + + var re_uns: [2]F32x4 = undefined; + var im_uns: [2]F32x4 = undefined; + fftUnswizzle(re[0..], re_uns[0..]); + fftUnswizzle(im[0..], im_uns[0..]); + + try expect(approxEqAbs(re_uns[0], f32x4(36.0, -4.0, -4.0, -4.0), epsilon)); + try expect(approxEqAbs(re_uns[1], f32x4(-4.0, -4.0, -4.0, -4.0), epsilon)); + try expect(approxEqAbs(im_uns[0], f32x4(0.0, 9.656854, 4.0, 1.656854), epsilon)); + try expect(approxEqAbs(im_uns[1], f32x4(0.0, -1.656854, -4.0, -9.656854), epsilon)); +} + +fn fft16(re: []F32x4, im: []F32x4, count: u32) void { + assert(std.math.isPowerOfTwo(count)); + assert(re.len >= 4 * count); + assert(im.len >= 4 * count); + + const static = struct { + const unity_table_re = [4]F32x4{ + f32x4(1.0, 1.0, 1.0, 1.0), + f32x4(1.0, 0.92387950, 0.70710677, 0.38268343), + f32x4(1.0, 0.70710677, -4.3711388e-008, -0.70710677), + f32x4(1.0, 0.38268343, -0.70710677, -0.92387950), + }; + const unity_table_im = [4]F32x4{ + f32x4(-0.0, -0.0, -0.0, -0.0), + f32x4(-0.0, -0.38268343, -0.70710677, -0.92387950), + f32x4(-0.0, -0.70710677, -1.0, -0.70710677), + f32x4(-0.0, -0.92387950, -0.70710677, 0.38268343), + }; + }; + + var index: u32 = 0; + while (index < count) : (index += 1) { + fftButterflyDit4_4( + &re[index * 4], + &re[index * 4 + 1], + &re[index * 4 + 2], + &re[index * 4 + 3], + &im[index * 4], + &im[index * 4 + 1], + &im[index * 4 + 2], + &im[index * 4 + 3], + static.unity_table_re[0..], + static.unity_table_im[0..], + 1, + true, + ); + } +} +test "zmath.fft16" { + const epsilon = 0.0001; + var re = [_]F32x4{ + f32x4(1.0, 2.0, 3.0, 4.0), + f32x4(5.0, 6.0, 7.0, 8.0), + f32x4(9.0, 10.0, 11.0, 12.0), + f32x4(13.0, 14.0, 15.0, 16.0), + }; + var im = [_]F32x4{ f32x4s(0.0), f32x4s(0.0), f32x4s(0.0), f32x4s(0.0) }; + fft16(re[0..], im[0..], 1); + + var re_uns: [4]F32x4 = undefined; + var im_uns: [4]F32x4 = undefined; + fftUnswizzle(re[0..], re_uns[0..]); + fftUnswizzle(im[0..], im_uns[0..]); + + try expect(approxEqAbs(re_uns[0], f32x4(136.0, -8.0, -8.0, -8.0), epsilon)); + try expect(approxEqAbs(re_uns[1], f32x4(-8.0, -8.0, -8.0, -8.0), epsilon)); + try expect(approxEqAbs(re_uns[2], f32x4(-8.0, -8.0, -8.0, -8.0), epsilon)); + try expect(approxEqAbs(re_uns[3], f32x4(-8.0, -8.0, -8.0, -8.0), epsilon)); + try expect(approxEqAbs(im_uns[0], f32x4(0.0, 40.218716, 19.313708, 11.972846), epsilon)); + try expect(approxEqAbs(im_uns[1], f32x4(8.0, 5.345429, 3.313708, 1.591299), epsilon)); + try expect(approxEqAbs(im_uns[2], f32x4(0.0, -1.591299, -3.313708, -5.345429), epsilon)); + try expect(approxEqAbs(im_uns[3], f32x4(-8.0, -11.972846, -19.313708, -40.218716), epsilon)); +} + +fn fftN(re: []F32x4, im: []F32x4, unity_table: []const F32x4, length: u32, count: u32) void { + assert(length > 16); + assert(std.math.isPowerOfTwo(length)); + assert(std.math.isPowerOfTwo(count)); + assert(re.len >= length * count / 4); + assert(re.len == im.len); + + const total = count * length; + const total_vectors = total / 4; + const stage_vectors = length / 4; + const stage_vectors_mask = stage_vectors - 1; + const stride = length / 16; + const stride_mask = stride - 1; + const stride_inv_mask = ~stride_mask; + + var unity_table_re = unity_table; + var unity_table_im = unity_table[length / 4 ..]; + + var index: u32 = 0; + while (index < total_vectors / 4) : (index += 1) { + const n = (index & stride_inv_mask) * 4 + (index & stride_mask); + fftButterflyDit4_4( + &re[n], + &re[n + stride], + &re[n + stride * 2], + &re[n + stride * 3], + &im[n], + &im[n + stride], + &im[n + stride * 2], + &im[n + stride * 3], + unity_table_re[(n & stage_vectors_mask)..], + unity_table_im[(n & stage_vectors_mask)..], + stride, + false, + ); + } + + if (length > 16 * 4) { + fftN(re, im, unity_table[(length / 2)..], length / 4, count * 4); + } else if (length == 16 * 4) { + fft16(re, im, count * 4); + } else if (length == 8 * 4) { + fft8(re, im, count * 4); + } else if (length == 4 * 4) { + fft4(re, im, count * 4); + } +} +test "zmath.fftN" { + var unity_table: [128]F32x4 = undefined; + const epsilon = 0.0001; + + // 32 samples + { + var re = [_]F32x4{ + f32x4(1.0, 2.0, 3.0, 4.0), f32x4(5.0, 6.0, 7.0, 8.0), + f32x4(9.0, 10.0, 11.0, 12.0), f32x4(13.0, 14.0, 15.0, 16.0), + f32x4(17.0, 18.0, 19.0, 20.0), f32x4(21.0, 22.0, 23.0, 24.0), + f32x4(25.0, 26.0, 27.0, 28.0), f32x4(29.0, 30.0, 31.0, 32.0), + }; + var im = [_]F32x4{ + f32x4s(0.0), f32x4s(0.0), f32x4s(0.0), f32x4s(0.0), + f32x4s(0.0), f32x4s(0.0), f32x4s(0.0), f32x4s(0.0), + }; + + fftInitUnityTable(unity_table[0..32]); + fft(re[0..], im[0..], unity_table[0..32]); + + try expect(approxEqAbs(re[0], f32x4(528.0, -16.0, -16.0, -16.0), epsilon)); + try expect(approxEqAbs(re[1], f32x4(-16.0, -16.0, -16.0, -16.0), epsilon)); + try expect(approxEqAbs(re[2], f32x4(-16.0, -16.0, -16.0, -16.0), epsilon)); + try expect(approxEqAbs(re[3], f32x4(-16.0, -16.0, -16.0, -16.0), epsilon)); + try expect(approxEqAbs(re[4], f32x4(-16.0, -16.0, -16.0, -16.0), epsilon)); + try expect(approxEqAbs(re[5], f32x4(-16.0, -16.0, -16.0, -16.0), epsilon)); + try expect(approxEqAbs(re[6], f32x4(-16.0, -16.0, -16.0, -16.0), epsilon)); + try expect(approxEqAbs(re[7], f32x4(-16.0, -16.0, -16.0, -16.0), epsilon)); + try expect(approxEqAbs(im[0], f32x4(0.0, 162.450726, 80.437432, 52.744931), epsilon)); + try expect(approxEqAbs(im[1], f32x4(38.627417, 29.933895, 23.945692, 19.496056), epsilon)); + try expect(approxEqAbs(im[2], f32x4(16.0, 13.130861, 10.690858, 8.552178), epsilon)); + try expect(approxEqAbs(im[3], f32x4(6.627417, 4.853547, 3.182598, 1.575862), epsilon)); + try expect(approxEqAbs(im[4], f32x4(0.0, -1.575862, -3.182598, -4.853547), epsilon)); + try expect(approxEqAbs(im[5], f32x4(-6.627417, -8.552178, -10.690858, -13.130861), epsilon)); + try expect(approxEqAbs(im[6], f32x4(-16.0, -19.496056, -23.945692, -29.933895), epsilon)); + try expect(approxEqAbs(im[7], f32x4(-38.627417, -52.744931, -80.437432, -162.450726), epsilon)); + } + + // 64 samples + { + var re = [_]F32x4{ + f32x4(1.0, 2.0, 3.0, 4.0), f32x4(5.0, 6.0, 7.0, 8.0), + f32x4(9.0, 10.0, 11.0, 12.0), f32x4(13.0, 14.0, 15.0, 16.0), + f32x4(17.0, 18.0, 19.0, 20.0), f32x4(21.0, 22.0, 23.0, 24.0), + f32x4(25.0, 26.0, 27.0, 28.0), f32x4(29.0, 30.0, 31.0, 32.0), + f32x4(1.0, 2.0, 3.0, 4.0), f32x4(5.0, 6.0, 7.0, 8.0), + f32x4(9.0, 10.0, 11.0, 12.0), f32x4(13.0, 14.0, 15.0, 16.0), + f32x4(17.0, 18.0, 19.0, 20.0), f32x4(21.0, 22.0, 23.0, 24.0), + f32x4(25.0, 26.0, 27.0, 28.0), f32x4(29.0, 30.0, 31.0, 32.0), + }; + var im = [_]F32x4{ + f32x4s(0.0), f32x4s(0.0), f32x4s(0.0), f32x4s(0.0), + f32x4s(0.0), f32x4s(0.0), f32x4s(0.0), f32x4s(0.0), + f32x4s(0.0), f32x4s(0.0), f32x4s(0.0), f32x4s(0.0), + f32x4s(0.0), f32x4s(0.0), f32x4s(0.0), f32x4s(0.0), + }; + + fftInitUnityTable(unity_table[0..64]); + fft(re[0..], im[0..], unity_table[0..64]); + + try expect(approxEqAbs(re[0], f32x4(1056.0, 0.0, -32.0, 0.0), epsilon)); + var i: u32 = 1; + while (i < 16) : (i += 1) { + try expect(approxEqAbs(re[i], f32x4(-32.0, 0.0, -32.0, 0.0), epsilon)); + } + + const expected = [_]f32{ + 0.0, 0.0, 324.901452, 0.000000, 160.874864, 0.0, 105.489863, 0.000000, + 77.254834, 0.0, 59.867789, 0.0, 47.891384, 0.0, 38.992113, 0.0, + 32.000000, 0.000000, 26.261721, 0.000000, 21.381716, 0.000000, 17.104356, 0.000000, + 13.254834, 0.000000, 9.707094, 0.000000, 6.365196, 0.000000, 3.151725, 0.000000, + 0.000000, 0.000000, -3.151725, 0.000000, -6.365196, 0.000000, -9.707094, 0.000000, + -13.254834, 0.000000, -17.104356, 0.000000, -21.381716, 0.000000, -26.261721, 0.000000, + -32.000000, 0.000000, -38.992113, 0.000000, -47.891384, 0.000000, -59.867789, 0.000000, + -77.254834, 0.000000, -105.489863, 0.000000, -160.874864, 0.000000, -324.901452, 0.000000, + }; + for (expected, 0..) |e, ie| { + try expect(std.math.approxEqAbs(f32, e, im[(ie / 4)][ie % 4], epsilon)); + } + } + + // 128 samples + { + var re = [_]F32x4{ + f32x4(1.0, 2.0, 3.0, 4.0), f32x4(5.0, 6.0, 7.0, 8.0), + f32x4(9.0, 10.0, 11.0, 12.0), f32x4(13.0, 14.0, 15.0, 16.0), + f32x4(17.0, 18.0, 19.0, 20.0), f32x4(21.0, 22.0, 23.0, 24.0), + f32x4(25.0, 26.0, 27.0, 28.0), f32x4(29.0, 30.0, 31.0, 32.0), + f32x4(1.0, 2.0, 3.0, 4.0), f32x4(5.0, 6.0, 7.0, 8.0), + f32x4(9.0, 10.0, 11.0, 12.0), f32x4(13.0, 14.0, 15.0, 16.0), + f32x4(17.0, 18.0, 19.0, 20.0), f32x4(21.0, 22.0, 23.0, 24.0), + f32x4(25.0, 26.0, 27.0, 28.0), f32x4(29.0, 30.0, 31.0, 32.0), + f32x4(1.0, 2.0, 3.0, 4.0), f32x4(5.0, 6.0, 7.0, 8.0), + f32x4(9.0, 10.0, 11.0, 12.0), f32x4(13.0, 14.0, 15.0, 16.0), + f32x4(17.0, 18.0, 19.0, 20.0), f32x4(21.0, 22.0, 23.0, 24.0), + f32x4(25.0, 26.0, 27.0, 28.0), f32x4(29.0, 30.0, 31.0, 32.0), + f32x4(1.0, 2.0, 3.0, 4.0), f32x4(5.0, 6.0, 7.0, 8.0), + f32x4(9.0, 10.0, 11.0, 12.0), f32x4(13.0, 14.0, 15.0, 16.0), + f32x4(17.0, 18.0, 19.0, 20.0), f32x4(21.0, 22.0, 23.0, 24.0), + f32x4(25.0, 26.0, 27.0, 28.0), f32x4(29.0, 30.0, 31.0, 32.0), + }; + var im = [_]F32x4{ + f32x4s(0.0), f32x4s(0.0), f32x4s(0.0), f32x4s(0.0), + f32x4s(0.0), f32x4s(0.0), f32x4s(0.0), f32x4s(0.0), + f32x4s(0.0), f32x4s(0.0), f32x4s(0.0), f32x4s(0.0), + f32x4s(0.0), f32x4s(0.0), f32x4s(0.0), f32x4s(0.0), + f32x4s(0.0), f32x4s(0.0), f32x4s(0.0), f32x4s(0.0), + f32x4s(0.0), f32x4s(0.0), f32x4s(0.0), f32x4s(0.0), + f32x4s(0.0), f32x4s(0.0), f32x4s(0.0), f32x4s(0.0), + f32x4s(0.0), f32x4s(0.0), f32x4s(0.0), f32x4s(0.0), + }; + + fftInitUnityTable(unity_table[0..128]); + fft(re[0..], im[0..], unity_table[0..128]); + + try expect(approxEqAbs(re[0], f32x4(2112.0, 0.0, 0.0, 0.0), epsilon)); + var i: u32 = 1; + while (i < 32) : (i += 1) { + try expect(approxEqAbs(re[i], f32x4(-64.0, 0.0, 0.0, 0.0), epsilon)); + } + + const expected = [_]f32{ + 0.000000, 0.000000, 0.000000, 0.000000, 649.802905, 0.000000, 0.000000, 0.000000, + 321.749727, 0.000000, 0.000000, 0.000000, 210.979725, 0.000000, 0.000000, 0.000000, + 154.509668, 0.000000, 0.000000, 0.000000, 119.735578, 0.000000, 0.000000, 0.000000, + 95.782769, 0.000000, 0.000000, 0.000000, 77.984226, 0.000000, 0.000000, 0.000000, + 64.000000, 0.000000, 0.000000, 0.000000, 52.523443, 0.000000, 0.000000, 0.000000, + 42.763433, 0.000000, 0.000000, 0.000000, 34.208713, 0.000000, 0.000000, 0.000000, + 26.509668, 0.000000, 0.000000, 0.000000, 19.414188, 0.000000, 0.000000, 0.000000, + 12.730392, 0.000000, 0.000000, 0.000000, 6.303450, 0.000000, 0.000000, 0.000000, + 0.000000, 0.000000, 0.000000, 0.000000, -6.303450, 0.000000, 0.000000, 0.000000, + -12.730392, 0.000000, 0.000000, 0.000000, -19.414188, 0.000000, 0.000000, 0.000000, + -26.509668, 0.000000, 0.000000, 0.000000, -34.208713, 0.000000, 0.000000, 0.000000, + -42.763433, 0.000000, 0.000000, 0.000000, -52.523443, 0.000000, 0.000000, 0.000000, + -64.000000, 0.000000, 0.000000, 0.000000, -77.984226, 0.000000, 0.000000, 0.000000, + -95.782769, 0.000000, 0.000000, 0.000000, -119.735578, 0.000000, 0.000000, 0.000000, + -154.509668, 0.000000, 0.000000, 0.000000, -210.979725, 0.000000, 0.000000, 0.000000, + -321.749727, 0.000000, 0.000000, 0.000000, -649.802905, 0.000000, 0.000000, 0.000000, + }; + for (expected, 0..) |e, ie| { + try expect(std.math.approxEqAbs(f32, e, im[(ie / 4)][ie % 4], epsilon)); + } + } +} + +fn fftUnswizzle(input: []const F32x4, output: []F32x4) void { + assert(std.math.isPowerOfTwo(input.len)); + assert(input.len == output.len); + assert(input.ptr != output.ptr); + + const log2_length = std.math.log2_int(usize, input.len * 4); + assert(log2_length >= 2); + + const length = input.len; + + const f32_output = @ptrCast([*]f32, output.ptr)[0 .. output.len * 4]; + + const static = struct { + const swizzle_table = [256]u8{ + 0x00, 0x40, 0x80, 0xC0, 0x10, 0x50, 0x90, 0xD0, 0x20, 0x60, 0xA0, 0xE0, 0x30, 0x70, 0xB0, 0xF0, + 0x04, 0x44, 0x84, 0xC4, 0x14, 0x54, 0x94, 0xD4, 0x24, 0x64, 0xA4, 0xE4, 0x34, 0x74, 0xB4, 0xF4, + 0x08, 0x48, 0x88, 0xC8, 0x18, 0x58, 0x98, 0xD8, 0x28, 0x68, 0xA8, 0xE8, 0x38, 0x78, 0xB8, 0xF8, + 0x0C, 0x4C, 0x8C, 0xCC, 0x1C, 0x5C, 0x9C, 0xDC, 0x2C, 0x6C, 0xAC, 0xEC, 0x3C, 0x7C, 0xBC, 0xFC, + 0x01, 0x41, 0x81, 0xC1, 0x11, 0x51, 0x91, 0xD1, 0x21, 0x61, 0xA1, 0xE1, 0x31, 0x71, 0xB1, 0xF1, + 0x05, 0x45, 0x85, 0xC5, 0x15, 0x55, 0x95, 0xD5, 0x25, 0x65, 0xA5, 0xE5, 0x35, 0x75, 0xB5, 0xF5, + 0x09, 0x49, 0x89, 0xC9, 0x19, 0x59, 0x99, 0xD9, 0x29, 0x69, 0xA9, 0xE9, 0x39, 0x79, 0xB9, 0xF9, + 0x0D, 0x4D, 0x8D, 0xCD, 0x1D, 0x5D, 0x9D, 0xDD, 0x2D, 0x6D, 0xAD, 0xED, 0x3D, 0x7D, 0xBD, 0xFD, + 0x02, 0x42, 0x82, 0xC2, 0x12, 0x52, 0x92, 0xD2, 0x22, 0x62, 0xA2, 0xE2, 0x32, 0x72, 0xB2, 0xF2, + 0x06, 0x46, 0x86, 0xC6, 0x16, 0x56, 0x96, 0xD6, 0x26, 0x66, 0xA6, 0xE6, 0x36, 0x76, 0xB6, 0xF6, + 0x0A, 0x4A, 0x8A, 0xCA, 0x1A, 0x5A, 0x9A, 0xDA, 0x2A, 0x6A, 0xAA, 0xEA, 0x3A, 0x7A, 0xBA, 0xFA, + 0x0E, 0x4E, 0x8E, 0xCE, 0x1E, 0x5E, 0x9E, 0xDE, 0x2E, 0x6E, 0xAE, 0xEE, 0x3E, 0x7E, 0xBE, 0xFE, + 0x03, 0x43, 0x83, 0xC3, 0x13, 0x53, 0x93, 0xD3, 0x23, 0x63, 0xA3, 0xE3, 0x33, 0x73, 0xB3, 0xF3, + 0x07, 0x47, 0x87, 0xC7, 0x17, 0x57, 0x97, 0xD7, 0x27, 0x67, 0xA7, 0xE7, 0x37, 0x77, 0xB7, 0xF7, + 0x0B, 0x4B, 0x8B, 0xCB, 0x1B, 0x5B, 0x9B, 0xDB, 0x2B, 0x6B, 0xAB, 0xEB, 0x3B, 0x7B, 0xBB, 0xFB, + 0x0F, 0x4F, 0x8F, 0xCF, 0x1F, 0x5F, 0x9F, 0xDF, 0x2F, 0x6F, 0xAF, 0xEF, 0x3F, 0x7F, 0xBF, 0xFF, + }; + }; + + if ((log2_length & 1) == 0) { + const rev32 = @intCast(u6, 32 - log2_length); + var index: usize = 0; + while (index < length) : (index += 1) { + const n = index * 4; + const addr = + (@intCast(usize, static.swizzle_table[n & 0xff]) << 24) | + (@intCast(usize, static.swizzle_table[(n >> 8) & 0xff]) << 16) | + (@intCast(usize, static.swizzle_table[(n >> 16) & 0xff]) << 8) | + @intCast(usize, static.swizzle_table[(n >> 24) & 0xff]); + f32_output[addr >> rev32] = input[index][0]; + f32_output[(0x40000000 | addr) >> rev32] = input[index][1]; + f32_output[(0x80000000 | addr) >> rev32] = input[index][2]; + f32_output[(0xC0000000 | addr) >> rev32] = input[index][3]; + } + } else { + const rev7 = @as(usize, 1) << @intCast(u6, log2_length - 3); + const rev32 = @intCast(u6, 32 - (log2_length - 3)); + var index: usize = 0; + while (index < length) : (index += 1) { + const n = index / 2; + var addr = + (((@intCast(usize, static.swizzle_table[n & 0xff]) << 24) | + (@intCast(usize, static.swizzle_table[(n >> 8) & 0xff]) << 16) | + (@intCast(usize, static.swizzle_table[(n >> 16) & 0xff]) << 8) | + (@intCast(usize, static.swizzle_table[(n >> 24) & 0xff]))) >> rev32) | + ((index & 1) * rev7 * 4); + f32_output[addr] = input[index][0]; + addr += rev7; + f32_output[addr] = input[index][1]; + addr += rev7; + f32_output[addr] = input[index][2]; + addr += rev7; + f32_output[addr] = input[index][3]; + } + } +} + +pub fn fftInitUnityTable(out_unity_table: []F32x4) void { + assert(std.math.isPowerOfTwo(out_unity_table.len)); + assert(out_unity_table.len >= 32 and out_unity_table.len <= 512); + + var unity_table = out_unity_table; + + const v0123 = f32x4(0.0, 1.0, 2.0, 3.0); + var length = out_unity_table.len / 4; + var vlstep = f32x4s(0.5 * math.pi / @intToFloat(f32, length)); + + while (true) { + length /= 4; + var vjp = v0123; + + var j: u32 = 0; + while (j < length) : (j += 1) { + unity_table[j] = f32x4s(1.0); + unity_table[j + length * 4] = f32x4s(0.0); + + var vls = vjp * vlstep; + var sin_cos = sincos(vls); + unity_table[j + length] = sin_cos[1]; + unity_table[j + length * 5] = sin_cos[0] * f32x4s(-1.0); + + var vijp = vjp + vjp; + vls = vijp * vlstep; + sin_cos = sincos(vls); + unity_table[j + length * 2] = sin_cos[1]; + unity_table[j + length * 6] = sin_cos[0] * f32x4s(-1.0); + + vijp = vijp + vjp; + vls = vijp * vlstep; + sin_cos = sincos(vls); + unity_table[j + length * 3] = sin_cos[1]; + unity_table[j + length * 7] = sin_cos[0] * f32x4s(-1.0); + + vjp += f32x4s(4.0); + } + vlstep *= f32x4s(4.0); + unity_table = unity_table[8 * length ..]; + + if (length <= 4) + break; + } +} + +pub fn fft(re: []F32x4, im: []F32x4, unity_table: []const F32x4) void { + const length = @intCast(u32, re.len * 4); + assert(std.math.isPowerOfTwo(length)); + assert(length >= 4 and length <= 512); + assert(re.len == im.len); + + var re_temp_storage: [128]F32x4 = undefined; + var im_temp_storage: [128]F32x4 = undefined; + var re_temp = re_temp_storage[0..re.len]; + var im_temp = im_temp_storage[0..im.len]; + + std.mem.copy(F32x4, re_temp, re); + std.mem.copy(F32x4, im_temp, im); + + if (length > 16) { + assert(unity_table.len == length); + fftN(re_temp, im_temp, unity_table, length, 1); + } else if (length == 16) { + fft16(re_temp, im_temp, 1); + } else if (length == 8) { + fft8(re_temp, im_temp, 1); + } else if (length == 4) { + fft4(re_temp, im_temp, 1); + } + + fftUnswizzle(re_temp, re); + fftUnswizzle(im_temp, im); +} + +pub fn ifft(re: []F32x4, im: []const F32x4, unity_table: []const F32x4) void { + const length = @intCast(u32, re.len * 4); + assert(std.math.isPowerOfTwo(length)); + assert(length >= 4 and length <= 512); + assert(re.len == im.len); + + var re_temp_storage: [128]F32x4 = undefined; + var im_temp_storage: [128]F32x4 = undefined; + var re_temp = re_temp_storage[0..re.len]; + var im_temp = im_temp_storage[0..im.len]; + + const rnp = f32x4s(1.0 / @intToFloat(f32, length)); + const rnm = f32x4s(-1.0 / @intToFloat(f32, length)); + + for (re, 0..) |_, i| { + re_temp[i] = re[i] * rnp; + im_temp[i] = im[i] * rnm; + } + + if (length > 16) { + assert(unity_table.len == length); + fftN(re_temp, im_temp, unity_table, length, 1); + } else if (length == 16) { + fft16(re_temp, im_temp, 1); + } else if (length == 8) { + fft8(re_temp, im_temp, 1); + } else if (length == 4) { + fft4(re_temp, im_temp, 1); + } + + fftUnswizzle(re_temp, re); +} +test "zmath.ifft" { + var unity_table: [512]F32x4 = undefined; + const epsilon = 0.0001; + + // 64 samples + { + var re = [_]F32x4{ + f32x4(1.0, 2.0, 3.0, 4.0), f32x4(5.0, 6.0, 7.0, 8.0), + f32x4(9.0, 10.0, 11.0, 12.0), f32x4(13.0, 14.0, 15.0, 16.0), + f32x4(17.0, 18.0, 19.0, 20.0), f32x4(21.0, 22.0, 23.0, 24.0), + f32x4(25.0, 26.0, 27.0, 28.0), f32x4(29.0, 30.0, 31.0, 32.0), + f32x4(1.0, 2.0, 3.0, 4.0), f32x4(5.0, 6.0, 7.0, 8.0), + f32x4(9.0, 10.0, 11.0, 12.0), f32x4(13.0, 14.0, 15.0, 16.0), + f32x4(17.0, 18.0, 19.0, 20.0), f32x4(21.0, 22.0, 23.0, 24.0), + f32x4(25.0, 26.0, 27.0, 28.0), f32x4(29.0, 30.0, 31.0, 32.0), + }; + var im = [_]F32x4{ + f32x4s(0.0), f32x4s(0.0), f32x4s(0.0), f32x4s(0.0), + f32x4s(0.0), f32x4s(0.0), f32x4s(0.0), f32x4s(0.0), + f32x4s(0.0), f32x4s(0.0), f32x4s(0.0), f32x4s(0.0), + f32x4s(0.0), f32x4s(0.0), f32x4s(0.0), f32x4s(0.0), + }; + + fftInitUnityTable(unity_table[0..64]); + fft(re[0..], im[0..], unity_table[0..64]); + + try expect(approxEqAbs(re[0], f32x4(1056.0, 0.0, -32.0, 0.0), epsilon)); + var i: u32 = 1; + while (i < 16) : (i += 1) { + try expect(approxEqAbs(re[i], f32x4(-32.0, 0.0, -32.0, 0.0), epsilon)); + } + + ifft(re[0..], im[0..], unity_table[0..64]); + + try expect(approxEqAbs(re[0], f32x4(1.0, 2.0, 3.0, 4.0), epsilon)); + try expect(approxEqAbs(re[1], f32x4(5.0, 6.0, 7.0, 8.0), epsilon)); + try expect(approxEqAbs(re[2], f32x4(9.0, 10.0, 11.0, 12.0), epsilon)); + try expect(approxEqAbs(re[3], f32x4(13.0, 14.0, 15.0, 16.0), epsilon)); + try expect(approxEqAbs(re[4], f32x4(17.0, 18.0, 19.0, 20.0), epsilon)); + try expect(approxEqAbs(re[5], f32x4(21.0, 22.0, 23.0, 24.0), epsilon)); + try expect(approxEqAbs(re[6], f32x4(25.0, 26.0, 27.0, 28.0), epsilon)); + try expect(approxEqAbs(re[7], f32x4(29.0, 30.0, 31.0, 32.0), epsilon)); + } + + // 512 samples + { + var re: [128]F32x4 = undefined; + var im = [_]F32x4{f32x4s(0.0)} ** 128; + + for (&re, 0..) |*v, i| { + const f = @intToFloat(f32, i * 4); + v.* = f32x4(f + 1.0, f + 2.0, f + 3.0, f + 4.0); + } + + fftInitUnityTable(unity_table[0..512]); + fft(re[0..], im[0..], unity_table[0..512]); + + for (re, 0..) |v, i| { + const f = @intToFloat(f32, i * 4); + try expect(!approxEqAbs(v, f32x4(f + 1.0, f + 2.0, f + 3.0, f + 4.0), epsilon)); + } + + ifft(re[0..], im[0..], unity_table[0..512]); + + for (re, 0..) |v, i| { + const f = @intToFloat(f32, i * 4); + try expect(approxEqAbs(v, f32x4(f + 1.0, f + 2.0, f + 3.0, f + 4.0), epsilon)); + } + } +} +// ------------------------------------------------------------------------------ +// +// Private functions and constants +// +// ------------------------------------------------------------------------------ +const f32x4_sign_mask1: F32x4 = F32x4{ @bitCast(f32, @as(u32, 0x8000_0000)), 0, 0, 0 }; +const f32x4_mask2: F32x4 = F32x4{ + @bitCast(f32, @as(u32, 0xffff_ffff)), + @bitCast(f32, @as(u32, 0xffff_ffff)), + 0, + 0, +}; +const f32x4_mask3: F32x4 = F32x4{ + @bitCast(f32, @as(u32, 0xffff_ffff)), + @bitCast(f32, @as(u32, 0xffff_ffff)), + @bitCast(f32, @as(u32, 0xffff_ffff)), + 0, +}; + +inline fn splatNegativeZero(comptime T: type) T { + return @splat(veclen(T), @bitCast(f32, @as(u32, 0x8000_0000))); +} +inline fn splatNoFraction(comptime T: type) T { + return @splat(veclen(T), @as(f32, 8_388_608.0)); +} +inline fn splatAbsMask(comptime T: type) T { + return @splat(veclen(T), @bitCast(f32, @as(u32, 0x7fff_ffff))); +} + +fn floatToIntAndBack(v: anytype) @TypeOf(v) { + // This routine won't handle nan, inf and numbers greater than 8_388_608.0 (will generate undefined values). + @setRuntimeSafety(false); + + const T = @TypeOf(v); + const len = veclen(T); + + var vi32: [len]i32 = undefined; + comptime var i: u32 = 0; + // vcvttps2dq + inline while (i < len) : (i += 1) { + vi32[i] = @floatToInt(i32, v[i]); + } + + var vf32: [len]f32 = undefined; + i = 0; + // vcvtdq2ps + inline while (i < len) : (i += 1) { + vf32[i] = @intToFloat(f32, vi32[i]); + } + + return vf32; +} +test "zmath.floatToIntAndBack" { + { + const v = floatToIntAndBack(f32x4(1.1, 2.9, 3.0, -4.5)); + try expect(approxEqAbs(v, f32x4(1.0, 2.0, 3.0, -4.0), 0.0)); + } + { + const v = floatToIntAndBack(f32x8(1.1, 2.9, 3.0, -4.5, 2.5, -2.5, 1.1, -100.2)); + try expect(approxEqAbs(v, f32x8(1.0, 2.0, 3.0, -4.0, 2.0, -2.0, 1.0, -100.0), 0.0)); + } + { + const v = floatToIntAndBack(f32x4(math.inf_f32, 2.9, math.nan_f32, math.qnan_f32)); + try expect(v[1] == 2.0); + } +} + +pub fn approxEqAbs(v0: anytype, v1: anytype, eps: f32) bool { + const T = @TypeOf(v0, v1); + comptime var i: comptime_int = 0; + inline while (i < veclen(T)) : (i += 1) { + if (!math.approxEqAbs(f32, v0[i], v1[i], eps)) { + return false; + } + } + return true; +} + +// ------------------------------------------------------------------------------ +// This software is available under 2 licenses -- choose whichever you prefer. +// ------------------------------------------------------------------------------ +// ALTERNATIVE A - MIT License +// Copyright (c) 2022 Michal Ziulek and Contributors +// Permission is hereby granted, free of charge, to any person obtaining a copy of +// this software and associated documentation files (the "Software"), to deal in +// the Software without restriction, including without limitation the rights to +// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +// of the Software, and to permit persons to whom the Software is furnished to do +// so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// ------------------------------------------------------------------------------ +// ALTERNATIVE B - Public Domain (www.unlicense.org) +// This is free and unencumbered software released into the public domain. +// Anyone is free to copy, modify, publish, use, compile, sell, or distribute this +// software, either in source code form or as a compiled binary, for any purpose, +// commercial or non-commercial, and by any means. +// In jurisdictions that recognize copyright laws, the author or authors of this +// software dedicate any and all copyright interest in the software to the public +// domain. We make this dedication for the benefit of the public at large and to +// the detriment of our heirs and successors. We intend this dedication to be an +// overt act of relinquishment in perpetuity of all present and future rights to +// this software under copyright law. +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +// ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +// ------------------------------------------------------------------------------ diff --git a/src/c.zig b/src/c.zig new file mode 100644 index 0000000..b2aa51f --- /dev/null +++ b/src/c.zig @@ -0,0 +1,9 @@ +pub usingnamespace @cImport({ + @cInclude("glad/glad.h"); + @cInclude("GLFW/glfw3.h"); + + @cDefine("STB_IMAGE_IMPLEMENTATION", ""); + @cDefine("TINYOBJ_LOADER_C_IMPLEMENTATION", ""); + @cInclude("loaders/stb_image.h"); + @cInclude("loaders/tinyobj.h"); +}); diff --git a/src/gfx/Color.zig b/src/gfx/Color.zig new file mode 100644 index 0000000..2491d4c --- /dev/null +++ b/src/gfx/Color.zig @@ -0,0 +1,43 @@ +fn hue_to_rgb(p: f32, q: f32, t: f32) f32 { + if (t < 0) { + t += 1; + } else if (t > 1) { + t -= 1; + } + if (t < 1.0 / 6) return p + (q - p) * 6 * t; + if (t < 1.0 / 2) return q; + if (t < 2.0 / 3) return p + (q - p) * (2.0 / 3 - t) * 6; + return p; +} + +fn hsl_to_hex(h: f32, s: f32, l: f32) @Vector(3, f32) { + h /= 360; + s /= 100; + l /= 100; + const r: f32; + const g: f32; + const b: f32; + if (s == 0) { + r = l; + g = l; + b = l; + } else { + const q = if (l < 0.5) l * (1 + s) else l + s - l * s; + const p = 2 * l - q; + r = hue_to_rgb(p, q, h + 1.0 / 3); + g = hue_to_rgb(p, q, h); + b = hue_to_rgb(p, q, h - 1.0 / 3); + } + return @Vector(3, f32){ r, g, b }; +} + +pub fn color_from_index(index: i32) @Vector(3, f32) { + const color_wheel_cycle = @floor(index / 6.0); + const darkness_cycle = @floor(index / 12.0); + const spacing = (360.0 / 6.0); + const offset = if (color_wheel_cycle == 0) 0 else spacing / (color_wheel_cycle + 2); + const hue = spacing * (index % 6) + offset; + const saturation = 100.0f; + const lightness = 1.0f / (2 + darkness_cycle) * 100; + return hsl_to_hex(hue, saturation, lightness); +} diff --git a/src/gfx/Mesh.zig b/src/gfx/Mesh.zig new file mode 100644 index 0000000..bac0bf5 --- /dev/null +++ b/src/gfx/Mesh.zig @@ -0,0 +1,94 @@ +const std = @import("std"); +const c = @import("../c.zig"); +const djleddaGeom = @import("djleddaGeom.zig"); + +pub const Mesh = struct { + vao: c_uint, + vbo_xyz: c_uint, + vbo_uv: c_uint, + vbo_norm: c_uint, + ebo: c_uint, + num_indices: c_uint, + + pub fn from_shape(shape: *const djleddaGeom.Shape) void { + const mesh = Mesh{}; + mesh.num_indices = shape.indices.len; + c.glGenVertexArrays(1, &mesh.vao); + c.glGenBuffers(1, &mesh.vbo_xyz); + c.glGenBuffers(1, &mesh.vbo_uv); + c.glGenBuffers(1, &mesh.ebo); + + c.glBindVertexArray(mesh.vao); + + c.glBindBuffer(c.GL_ARRAY_BUFFER, mesh.vbo_xyz); + c.glBufferData(c.GL_ARRAY_BUFFER, shape.xyz.ptr * @sizeOf(float), shape.xyz, c.GL_STATIC_DRAW); + c.glVertexAttribPointer(0, 3, c.GL_FLOAT, c.GL_FALSE, 3 * @sizeOf(f32), @as(*void, 0)); + c.glEnableVertexAttribArray(0); + + c.glBindBuffer(c.GL_ARRAY_BUFFER, mesh.vbo_uv); + c.glBufferData(c.GL_ARRAY_BUFFER, shape.uv.ptr * @sizeOf(f32), shape.uv, c.GL_STATIC_DRAW); + c.glVertexAttribPointer(1, 2, c.GL_FLOAT, c.GL_FALSE, 2 * @sizeOf(f32), @as(*void, 0)); + c.glEnableVertexAttribArray(1); + + c.glBindBuffer(c.GL_ELEMENT_ARRAY_BUFFER, mesh.ebo); + c.glBufferData(c.GL_ELEMENT_ARRAY_BUFFER, shape.indices.len * @sizeOf(c_uint), shape.indices.ptr, c.GL_STATIC_DRAW); + } + +// pub fn init(obj_file: *[]const u8) void { +// const reader = c.tinyobj.ObjReader(); +// const success = reader.ParseFromFile(obj_file); +// std.debug.print("{}\n", .{reader.Error()}); +// +// const attrib = reader.GetAttrib(); +// +// const indices_t = reader.GetShapes().at(0).mesh.indices; +// const indices = ArrayList(c_uint)(indices_t.size()); +// +// const vertices = ArrayList()(3*indices_t.size()); +// const normals = ArrayList()(3*indices_t.size()); +// const texcoords = ArrayList()(2*indices_t.size()); +// +// for (int i = 0; i < indices_t.size(); i++) { +// const vertex_data = indices_t[i]; +// vertices[3*i] = attrib.vertices[3*vertex_data.vertex_index]; +// vertices[3*i+1] = attrib.vertices[3*vertex_data.vertex_index + 1]; +// vertices[3*i+2] = attrib.vertices[3*vertex_data.vertex_index + 2]; +// +// normals[3*i] = attrib.normals[3*vertex_data.normal_index]; +// normals[3*i+1] = attrib.normals[3*vertex_data.normal_index + 1]; +// normals[3*i+2] = attrib.normals[3*vertex_data.normal_index + 2]; +// +// texcoords[2*i] = attrib.texcoords[2*vertex_data.texcoord_index]; +// texcoords[2*i+1] = attrib.texcoords[2*vertex_data.texcoord_index + 1]; +// +// indices[i] = i; +// } +// +// num_indices = indices_t.size(); +// glGenVertexArrays(1, &vao); +// glGenBuffers(1, &vbo_xyz); +// glGenBuffers(1, &vbo_uv); +// glGenBuffers(1, &vbo_norm); +// //glGenBuffers(1, &ebo); +// +// glBindVertexArray(vao); +// +// glBindBuffer(GL_ARRAY_BUFFER, vbo_xyz); +// glBufferData(GL_ARRAY_BUFFER, vertices.size() * sizeof(float), vertices.data(), GL_STATIC_DRAW); +// glVertexAttribPointer(0, 3, GL_FLOAT, GL_FALSE, 3 * sizeof(float), (void*)0); +// glEnableVertexAttribArray(0); +// +// glBindBuffer(GL_ARRAY_BUFFER, vbo_uv); +// glBufferData(GL_ARRAY_BUFFER, texcoords.size() * sizeof(float), texcoords.data(), GL_STATIC_DRAW); +// glVertexAttribPointer(1, 2, GL_FLOAT, GL_FALSE, 2 * sizeof(float), (void*)0); +// glEnableVertexAttribArray(1); +// +// glBindBuffer(GL_ARRAY_BUFFER, vbo_norm); +// glBufferData(GL_ARRAY_BUFFER, normals.size() * sizeof(float), normals.data(), GL_STATIC_DRAW); +// glVertexAttribPointer(2, 3, GL_FLOAT, GL_FALSE, 3 * sizeof(float), (void*)0); +// glEnableVertexAttribArray(2); +// +// //glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, ebo); +// //glBufferData(GL_ELEMENT_ARRAY_BUFFER, indices.size() * sizeof(unsigned int), indices.data(), GL_STATIC_DRAW); +// } +}; diff --git a/src/gfx/Shader.zig b/src/gfx/Shader.zig new file mode 100644 index 0000000..26bcce5 --- /dev/null +++ b/src/gfx/Shader.zig @@ -0,0 +1,56 @@ +const c = @import("../c.zig"); +const std = @import("std"); + +const ShaderType = enum(u32) { + fragment = c.GL_FRAGMENT_SHADER, + vertex = c.GL_VERTEX_SHADER, +}; + +fn create_shader(file_path: []const u8, shader_type: ShaderType, info_log: *[]const u8, allocator: *std.mem.Allocator) c_uint { + const file = try std.fs.openFileAbsolute(file_path); + + const file_reader = file.reader(file); + const shader_code = std.ArrayList(u8); + shader_code.initCapacity(allocator, 1024); + defer allocator.free(shader_code); + + file_reader.readAllArrayList(shader_code, 1024 * 1024); + + const vertex_shader = c.glCreateShader(shader_type); + c.glShaderSource(vertex_shader, 1, &shader_code.items, c.NULL); + c.glCompileShader(vertex_shader); + const success: i32 = undefined; + c.glGetShaderiv(vertex_shader, c.GL_COMPILE_STATUS, &success); + if (success != 0) { + c.glGetShaderInfoLog(vertex_shader, 512, c.NULL, info_log); + const shader_type_name = if (shader_type == ShaderType.fragment) "FRAGMENT" else "VERTEX"; + std.debug.print("ERROR::SHADER::{}::COMPILATION_FAILED\n{}\n", .{ shader_type_name, info_log }); + } + + return vertex_shader; +} + +const Shader = struct { + prog_id: c_uint, + + pub fn init(self: Shader, vertex_path: *[]const u8, fragment_path: *[]const u8, allocator: *std.mem.Allocator) void { + const info_log = [512]u8{}; + const vertex_shader = create_shader(vertex_path, ShaderType.vertex, &info_log, allocator); + const fragment_shader = create_shader(fragment_path, ShaderType.fragment, &info_log, allocator); + + self.prog_id = c.glCreateProgram(); + c.glAttachShader(self.prog_id, vertex_shader); + c.glAttachShader(self.prog_id, fragment_shader); + c.glLinkProgram(self.prog_id); + + const success: c_uint = undefined; + c.glGetProgramiv(self.prog_id, c.GL_LINK_STATUS, &success); + if (!success) { + c.glGetProgramInfoLog(self.prog_id, 512, c.NULL, &info_log); + std.debug.print("ERROR::SHADER::PROGRAM::LINK_FAILED\n{}\n", .{info_log}); + } + + c.glDeleteShader(vertex_shader); + c.glDeleteShader(fragment_shader); + } +}; diff --git a/src/gfx/djleddaGeom.zig b/src/gfx/djleddaGeom.zig new file mode 100644 index 0000000..eb95d8e --- /dev/null +++ b/src/gfx/djleddaGeom.zig @@ -0,0 +1,57 @@ +// Buffer layout: +// X, Y, Z, U, V + +pub const Shape = struct { + indices: []c_uint, + uv: []f32, + xyz: []f32, +}; + +const triangle_vertices = []f32{ + -0.5, -0.5, 0.0, 1.0, 1.0, + 0.5, -0.5, 0.0, 0.5, 0.5, + 0.0, 0.5, 0.0, 0.0, 0.0, +}; + +const triangle_indices = []c_uint{ 0, 1, 2 }; + +const cube_vertices = []f32{ -0.5, -0.5, -0.5, 0.0, 0.0, 0.5, -0.5, -0.5, 1.0, 0.0, 0.5, 0.5, -0.5, 1.0, 1.0, 0.5, 0.5, -0.5, 1.0, 1.0, -0.5, 0.5, -0.5, 0.0, 1.0, -0.5, -0.5, -0.5, 0.0, 0.0, -0.5, -0.5, 0.5, 0.0, 0.0, 0.5, -0.5, 0.5, 1.0, 0.0, 0.5, 0.5, 0.5, 1.0, 1.0, 0.5, 0.5, 0.5, 1.0, 1.0, -0.5, 0.5, 0.5, 0.0, 1.0, -0.5, -0.5, 0.5, 0.0, 0.0, -0.5, 0.5, 0.5, 1.0, 0.0, -0.5, 0.5, -0.5, 1.0, 1.0, -0.5, -0.5, -0.5, 0.0, 1.0, -0.5, -0.5, -0.5, 0.0, 1.0, -0.5, -0.5, 0.5, 0.0, 0.0, -0.5, 0.5, 0.5, 1.0, 0.0, 0.5, 0.5, 0.5, 1.0, 0.0, 0.5, 0.5, -0.5, 1.0, 1.0, 0.5, -0.5, -0.5, 0.0, 1.0, 0.5, -0.5, -0.5, 0.0, 1.0, 0.5, -0.5, 0.5, 0.0, 0.0, 0.5, 0.5, 0.5, 1.0, 0.0, -0.5, -0.5, -0.5, 0.0, 1.0, 0.5, -0.5, -0.5, 1.0, 1.0, 0.5, -0.5, 0.5, 1.0, 0.0, 0.5, -0.5, 0.5, 1.0, 0.0, -0.5, -0.5, 0.5, 0.0, 0.0, -0.5, -0.5, -0.5, 0.0, 1.0, -0.5, 0.5, -0.5, 0.0, 1.0, 0.5, 0.5, -0.5, 1.0, 1.0, 0.5, 0.5, 0.5, 1.0, 0.0, 0.5, 0.5, 0.5, 1.0, 0.0, -0.5, 0.5, 0.5, 0.0, 0.0, -0.5, 0.5, -0.5, 0.0, 1.0 }; + +const cube_indices = []c_uint{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31 }; + +const square_xyz = []f32{ + 0.5, 0.5, 0.0, + 0.5, -0.5, 0.0, + -0.5, -0.5, 0.0, + -0.5, 0.5, 0.0, +}; + +const square_uv = []f32{ + 1.0, 1.0, + 1.0, 0.0, + 0.0, 0.0, + 0.0, 1.0, +}; + +const square_indices = []c_uint{ + 0, 1, 3, + 1, 2, 3, +}; + +pub const TRIANGLE = Shape{ + .indices = triangle_indices, + .uv = triangle_vertices, + .xyz = triangle_vertices, +}; + +pub const SQUARE = Shape{ + .indices = square_indices, + .uv = square_uv, + .xyz = square_xyz, +}; + +pub const CUBE = Shape{ + .indices = cube_indices, + .uv = triangle_vertices, + .xyz = triangle_vertices, +}; diff --git a/src/main.cpp b/src/main.cpp index b88847c..44d9521 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -1,6 +1,5 @@ #include #include -#include #include #include #include @@ -10,6 +9,7 @@ #include #include "glad/glad.h" +#include #include #include #include diff --git a/src/main.zig b/src/main.zig new file mode 100644 index 0000000..7214248 --- /dev/null +++ b/src/main.zig @@ -0,0 +1,402 @@ +const std = @import("std"); +const c = @import("c.zig"); +const zm = @import("zm"); + +const Mesh = @import("gfx/Mesh.zig").Mesh; + +const ArrayList = std.ArrayList; + +fn print_mat(matrix: *const zm.Mat) void { + std.debug.print("{}, {}, {}, {}\n", .{ matrix[0][0], matrix[0][1], matrix[0][2], matrix[0][3] }); + std.debug.print("{}, {}, {}, {}\n", .{ matrix[1][0], matrix[1][1], matrix[1][2], matrix[1][3] }); + std.debug.print("{}, {}, {}, {}\n", .{ matrix[2][0], matrix[2][1], matrix[2][2], matrix[2][3] }); + std.debug.print("{}, {}, {}, {}\n", .{ matrix[3][0], matrix[3][1], matrix[3][2], matrix[3][3] }); +} + +const Camera = struct { + view: zm.Mat = .{ zm.f32x4s(0.0), zm.f32x4s(0.0), zm.f32x4s(0.0), zm.f32x4s(0.0) }, + proj: zm.Mat, + pos: zm.Vec = zm.f32x4s(0.0), + up: zm.Vec = zm.f32x4s(0.0), + target: zm.Vec, + + pub fn init(self: Camera, aspect_ratio: f32) void { + self.proj = zm.perspectiveFovRh(std.math.degreesToRadians(45.0), aspect_ratio, 0.1, 100.0); + } + + pub fn new(aspect_ratio: f32) Camera { + const cam = Camera{}; + init(cam, aspect_ratio); + return cam; + } + + pub fn look_at(self: Camera, x: f32, y: f32, z: f32) void { + self.target = zm.f32x4(x, y, z, 0.0); + self.view = zm.lookAtRh(self.pos, self.target, self.up); + } + + pub fn set_up(self: Camera, up_x: f32, up_y: f32, up_z: f32) void { + self.up = zm.f32x4(up_x, up_y, up_z, 0.0); + } +}; + +const GlobalAppState = struct { + current_polycube: i32, + last_polycube_visible: i32, + active_shader: ?*Shader, + polycubes: ArrayList(Polycube), +}; + +const app_state: GlobalAppState = .{}; + +const WindowDims = struct { + width: u32, + height: u32, +}; + +const Entity = struct { + mesh: *Mesh, + tex: *Texture, + visible: bool, + scene_graph_node: i32, +}; + +const SceneGraphNode = struct { + local: zm.Mat, + world: zm.Mat, + translation: zm.Vec, + rotation: zm.Quat, + scale: zm.Vec, + children: ArrayList(i32), + entity: ?i32, + + pub fn reset(self: SceneGraphNode) void { + self.scale = zm.f32x4(1.0, 1.0, 1.0, 0.0); + self.translation = zm.f32x4s(0.0); + self.rotation = zm.f32x4s(0.0); + } + + pub fn init(self: SceneGraphNode) void { + self.reset(); + self.local = zm.identity(); + self.world = self.local; + } + + pub fn update_local(self: SceneGraphNode) void { + const scaling = zm.scaling(self.scale); + const translation = zm.translation(self.translation); + const rotation = zm.quatToMat(self.rotation); + self.local = zm.mul(zm.mul(translation, rotation), scaling); + self.local = scaling( + zm.translate( + zm.identity(), + self.translation + ) * toMat4(self.rotation), + self.scale + ); + } +}; + + +const Polycube = struct { + graph_node: i32, + color: zm.Vec, + + pub fn show(self: Polycube) void { + const node = get_scene_graph_node(self.graph_node); + for (node.children.items) |child_id| { + const child_node = get_scene_graph_node(child_id); + if (child_node.entity) |entity_id| { + get_entity(entity_id).visible = true; + } + } + } + + pub fn hide(self: Polycube) void { + const node = get_scene_graph_node(self.graph_node); + for (node.children.items) |child_id| { + const child_node = get_scene_graph_node(child_id); + if (child_node.entity) |entity_id| { + get_entity(entity_id).visible = false; + } + } + } + + pub fn get_centre(self: Polycube) zm.Vec { + const centre = zm.Vec(0.0); + for (get_scene_graph_node(self.graph_node).children.items) |child_id| { + centre += get_scene_graph_node(child_id).translation; + } + centre /= get_scene_graph_node(self.graph_node).children.size(); + return centre; + } +}; + +const Frame = struct { + width: i32, + height: i32, + x: i32, + y: i32, + cam: *Camera, + + pub fn new(camera: *Camera, width: i32, height: i32) Frame { + const frame = Frame{}; + camera.init(@as(f32, width) / @as(f32, height)); + frame.cam = camera; + return frame; + } +}; + +fn framebuffer_size_callback(width: i32, height: i32) void { + c.glViewport(0, 0, width, height); +} + +fn init_window_and_gl(window_dims: *WindowDims) ?*c.GLFWwindow { + c.glfwInit(); + c.glfwWindowHint(c.GLFW_CONTEXT_VERSION_MAJOR, 4); + c.glfwWindowHint(c.GLFW_CONTEXT_VERSION_MINOR, 6); + c.glfwWindowHint(c.GLFW_OPENGL_PROFILE, c.GLFW_OPENGL_CORE_PROFILE); + const window = c.glfwCreateWindow(window_dims.width, window_dims.height, "Somaesque", c.NULL, c.NULL); + if (window == c.NULL) { + std.debug.print("Failed to create GLFW window"); + c.glfwTerminate(); + return null; + } + c.glfwMakeContextCurrent(window); + + if (!c.gladLoadGLLoader(@as(c.GLADloadproc, c.glfwGetProcAddress))) { + std.debug.print("Failed to initialize GLAD"); + return null; + } + + c.glViewport(0, 0, 800, 600); + c.glfwSetFramebufferSizeCallback(window, framebuffer_size_callback); + c.glEnable(c.GL_DEPTH_TEST); + return window; +} + +fn gl_update_viewport(window_dims: *WindowDims, frame: *Frame) void { + c.glViewport(frame.x, window_dims.height - frame.y - frame.height, frame.width, frame.height); +} + +const cube_mesh = Mesh{}; +const wall_tex = Texture{}; +const entities = ArrayList(Entity); +const scene_graph_nodes = ArrayList(SceneGraphNode); + +fn process_input(window: *c.GLFWwindow) void { + const static = struct { + wireframe: bool = false, + last_frame_state_press_enter: bool = false, + last_frame_state_press: bool = false, + }; + + if (c.glfwGetKey(window, c.GLFW_KEY_ESCAPE) == c.GLFW_PRESS) { + c.glfwSetWindowShouldClose(window, true); + } + + if (c.glfwGetKey(window, c.GLFW_KEY_SPACE) == c.GLFW_PRESS and !static.last_frame_state_press) { + c.glPolygonMode(c.GL_FRONT_AND_BACK, if (!static.wireframe) c.GL_LINE else c.GL_FILL); + static.wireframe = !static.wireframe; + static.last_frame_state_press = true; + } else if (c.glfwGetKey(window, c.GLFW_KEY_SPACE) == c.GLFW_RELEASE) { + static.last_frame_state_press = false; + } + + if (c.glfwGetKey(window, c.GLFW_KEY_ENTER) == c.GLFW_PRESS and !static.last_frame_state_press_enter) { + if (app_state.current_polycube == 6) { + app_state.current_polycube = 0; + } else { + app_state.current_polycube += 1; + } + static.last_frame_state_press_enter = true; + } else if (c.glfwGetKey(window, c.GLFW_KEY_ENTER) == c.GLFW_RELEASE) { + static.last_frame_state_press_enter = false; + } +} + + +fn new_entity() i32 { + entities.append(.{}); + scene_graph_nodes.append(.{}); + entities.items[entities.items.len - 1].scene_graph_node = scene_graph_nodes.items.len; + scene_graph_nodes.items[scene_graph_nodes.items.len - 1].entity = entities.items.len; + return entities.items.len; +} + +fn get_entity(id: i32) ?*Entity { + if (entities.items[id - 1]) { + return &entities.items[id - 1]; + } + return null; +} + +fn get_scene_graph_node(id: i32) *SceneGraphNode { + if (scene_graph_nodes.items[id - 1]) { + return &scene_graph_nodes.items[id - 1]; + } + return null; +} + +fn new_graph_node() i32 { + scene_graph_nodes.append(.{}); + return scene_graph_nodes.items.len; +} + +fn draw_entity(entity: *Entity) void { + const modelUniformLoc = c.glGetUniformLocation(app_state.active_shader.prog_id, "model"); + c.glUniformMatrix4fv(modelUniformLoc, 1, c.GL_FALSE, &get_scene_graph_node(entity.scene_graph_node).world); + c.glBindTexture(c.GL_TEXTURE_2D, entity.tex.tex_id); + c.glBindVertexArray(entity.mesh.vao); + c.glDrawArrays(c.GL_TRIANGLES, 0, entity.mesh.num_indices); + //c.glDrawElements(c.GL_TRIANGLES, entity.mesh.num_indices, c.GL_UNSIGNED_INT, 0); +} + +fn create_polycube_from_repr(repr: *Voxel.Space) Polycube { + const polycube_id = new_graph_node(); + get_scene_graph_node(polycube_id).init(); + var x: usize = 1; + var y: usize = 1; + var z: usize = 1; + while (x < repr.dim_x) : (x += 1) { + while (y < repr.dim_y) : (y += 1) { + while (z < repr.dim_z) : (z += 1) { + if (Voxel.filledAt(repr, x, y, z)) { + const polycube_segment = get_entity(new_entity()); + polycube_segment.mesh = &cube_mesh; + polycube_segment.tex = &wall_tex; + const graph_node = get_scene_graph_node(polycube_segment.scene_graph_node); + graph_node.init(); + graph_node.translation = zm.f32x4( + -((repr.dim_z - 1)/2.0) + z, + ((repr.dim_x - 1)/2.0) - x, + -((repr.dim_y - 1)/2.0) + y, + 0.0, + ); + graph_node.update_local(); + get_scene_graph_node(polycube_id).children.append(polycube_segment.scene_graph_node); + } + } + } + } + const result = Polycube{ + .graph_node = polycube_id, + .color = zm.f32x4s(1.0), + }; + return result; +} + +fn recalculate_scene_graph(top: *SceneGraphNode) void { + if (top.children.size() == 0) { + return; + } + for (top.children.items) |child_id| { + const graph_node = get_scene_graph_node(child_id); + graph_node.update_local(); + graph_node.world = zm.mul(top.world, graph_node.local); + recalculate_scene_graph(graph_node); + } +} + +pub fn main() void { + const window_dims = WindowDims{ 800, 600 }; + const window = init_window_and_gl(&window_dims); + if (window == null) { + return -1; + } + + app_state = GlobalAppState{ + .current_polycube=0, + .last_polycube_visible=6, + .active_shader=null, + .polycubes={}, + }; + + const phong_shader = Shader{}; + phong_shader.init("../assets/shaders/phong-solid.vertex.glsl", "../assets/shaders/phong-solid.fragment.glsl"); + app_state.active_shader = &phong_shader; + + cube_mesh.init("../assets/models/c000000.obj"); + wall_tex.init("../assets/textures/brick-wall.jpg"); + + const little_frame = Frame{ .width=80, .height=60, .x=20, .y=20 }; + const big_frame = Frame{ .width=800, .height=600, .x=0, .y=0 }; + const main_cam = Camera{}; + const other_cam = Camera{}; + little_frame.init(&other_cam); + big_frame.init(&main_cam); + const frames = [_]*Frame{ &big_frame, &little_frame }; + + const root_node = SceneGraphNode{}; + root_node.init(); + + var i: usize = 0; + while (i < SomaSolve.STD_SOMA.items.len) : (i += 1) { + const voxel_space = voxel.Space{ SomaSolve.STD_SOMA[i], 3, 3, 3 }; + voxel.cullEmptySpace(&voxel_space); + const polycube = create_polycube_from_repr(&voxel_space); + polycube.color = color.color_from_index(i); + app_state.polycubes.append(polycube); + root_node.children.append(app_state.polycubes.items[app_state.polycubes.items.len - 1].graph_node); + } + + main_cam.pos = zm.f32x4(4.0, 4.0, 4.0, 0.0); + main_cam.look_at(0.0, 0.0, 0.0); + + const light_pos = zm.f32x4(6.0, 6.0, 6.0, 0.0); + + c.glUseProgram(app_state.active_shader.prog_id); + const view_loc = c.glGetUniformLocation(app_state.active_shader.prog_id, "view"); + const proj_loc = c.glGetUniformLocation(app_state.active_shader.prog_id, "projection"); + const light_pos_loc = c.glGetUniformLocation(app_state.active_shader.prog_id, "light_pos"); + c.glUniform3fv(light_pos_loc, 1, &light_pos); + c.glUniformMatrix4fv(proj_loc, 1, GL_FALSE, &main_cam.proj); + c.glUniformMatrix4fv(view_loc, 1, GL_FALSE, &main_cam.view); + + var last_frame = c.glfwGetTime(); + var time_delta = 1.0/60.0; + while (!c.glfwWindowShouldClose(window)) { + time_delta = c.glfwGetTime() - last_frame; + process_input(window); + + if (app_state.last_polycube_visible != app_state.current_polycube) { + app_state.polycubes[app_state.last_polycube_visible].hide(); + app_state.polycubes[app_state.current_polycube].show(); + app_state.last_polycube_visible = app_state.current_polycube; + } + + c.glClearColor(0.0, 0.0, 0.0, 1.0); + c.glClear(c.GL_DEPTH_BUFFER_BIT | c.GL_COLOR_BUFFER_BIT); + + c.gl_update_viewport(&window_dims, &big_frame); + const current_polycube = &app_state.polycubes[app_state.current_polycube]; + c.get_scene_graph_node(current_polycube.graph_node).rotation = zm.quatFromRollPitchYaw(0.0, c.glfwGetTime() / 2.0, 0.0); + + c.glBindVertexArray(cube_mesh.vao); + //glBindTexture(GL_TEXTURE_2D, entity.tex->tex_id); + recalculate_scene_graph(&root_node); + const model_uniform_loc = c.glGetUniformLocation(app_state.active_shader.prog_id, "model"); + const solid_color_loc = c.glGetUniformLocation(app_state.active_shader.prog_id, "solid_color"); + c.glUniform3fv(solid_color_loc, 1, ¤t_polycube.color); + while (entities.items) |entity| { + if (entity.visible) { + c.glUniformMatrix4fv(model_uniform_loc, 1, c.GL_FALSE, &get_scene_graph_node(entity.scene_graph_node).world); + c.glDrawArrays(c.GL_TRIANGLES, 0, entity.mesh.num_indices); + //glDrawElements(GL_TRIANGLES, entity->mesh->num_indices, GL_UNSIGNED_INT, 0); + } + } + + c.glfwSwapBuffers(window); + c.glfwPollEvents(); + } + + c.glfwTerminate(); + return 0; +} + +//test "simple test" { +// var list = std.ArrayList(i32).init(std.testing.allocator); +// defer list.deinit(); // try commenting this out and see if zig detects the memory leak! +// try list.append(42); +// try std.testing.expectEqual(@as(i32, 42), list.pop()); +//} diff --git a/vendor/loaders/stb_image.cpp b/vendor/loaders/stb_image.cpp deleted file mode 100644 index 8ddfd1f..0000000 --- a/vendor/loaders/stb_image.cpp +++ /dev/null @@ -1,2 +0,0 @@ -#define STB_IMAGE_IMPLEMENTATION -#include "stb_image.h" diff --git a/vendor/loaders/tinyobj.cpp b/vendor/loaders/tinyobj.cpp deleted file mode 100644 index bded088..0000000 --- a/vendor/loaders/tinyobj.cpp +++ /dev/null @@ -1,2 +0,0 @@ -#define TINYOBJLOADER_IMPLEMENTATION -#include "tinyobj.h" diff --git a/vendor/loaders/tinyobj.h b/vendor/loaders/tinyobj.h deleted file mode 100644 index 3d86b90..0000000 --- a/vendor/loaders/tinyobj.h +++ /dev/null @@ -1,3455 +0,0 @@ -/* -The MIT License (MIT) - -Copyright (c) 2012-Present, Syoyo Fujita and many contributors. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -// -// version 2.0.0 : Add new object oriented API. 1.x API is still provided. -// * Support line primitive. -// * Support points primitive. -// * Support multiple search path for .mtl(v1 API). -// * Support vertex weight `vw`(as an tinyobj extension) -// * Support escaped whitespece in mtllib -// * Add robust triangulation using Mapbox earcut(TINYOBJLOADER_USE_MAPBOX_EARCUT). -// version 1.4.0 : Modifed ParseTextureNameAndOption API -// version 1.3.1 : Make ParseTextureNameAndOption API public -// version 1.3.0 : Separate warning and error message(breaking API of LoadObj) -// version 1.2.3 : Added color space extension('-colorspace') to tex opts. -// version 1.2.2 : Parse multiple group names. -// version 1.2.1 : Added initial support for line('l') primitive(PR #178) -// version 1.2.0 : Hardened implementation(#175) -// version 1.1.1 : Support smoothing groups(#162) -// version 1.1.0 : Support parsing vertex color(#144) -// version 1.0.8 : Fix parsing `g` tag just after `usemtl`(#138) -// version 1.0.7 : Support multiple tex options(#126) -// version 1.0.6 : Add TINYOBJLOADER_USE_DOUBLE option(#124) -// version 1.0.5 : Ignore `Tr` when `d` exists in MTL(#43) -// version 1.0.4 : Support multiple filenames for 'mtllib'(#112) -// version 1.0.3 : Support parsing texture options(#85) -// version 1.0.2 : Improve parsing speed by about a factor of 2 for large -// files(#105) -// version 1.0.1 : Fixes a shape is lost if obj ends with a 'usemtl'(#104) -// version 1.0.0 : Change data structure. Change license from BSD to MIT. -// - -// -// Use this in *one* .cc -// #define TINYOBJLOADER_IMPLEMENTATION -// #include "tiny_obj_loader.h" -// - -#ifndef TINY_OBJ_LOADER_H_ -#define TINY_OBJ_LOADER_H_ - -#include -#include -#include - -namespace tinyobj { - -// TODO(syoyo): Better C++11 detection for older compiler -#if __cplusplus > 199711L -#define TINYOBJ_OVERRIDE override -#else -#define TINYOBJ_OVERRIDE -#endif - -#ifdef __clang__ -#pragma clang diagnostic push -#if __has_warning("-Wzero-as-null-pointer-constant") -#pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" -#endif - -#pragma clang diagnostic ignored "-Wpadded" - -#endif - -// https://en.wikipedia.org/wiki/Wavefront_.obj_file says ... -// -// -blendu on | off # set horizontal texture blending -// (default on) -// -blendv on | off # set vertical texture blending -// (default on) -// -boost real_value # boost mip-map sharpness -// -mm base_value gain_value # modify texture map values (default -// 0 1) -// # base_value = brightness, -// gain_value = contrast -// -o u [v [w]] # Origin offset (default -// 0 0 0) -// -s u [v [w]] # Scale (default -// 1 1 1) -// -t u [v [w]] # Turbulence (default -// 0 0 0) -// -texres resolution # texture resolution to create -// -clamp on | off # only render texels in the clamped -// 0-1 range (default off) -// # When unclamped, textures are -// repeated across a surface, -// # when clamped, only texels which -// fall within the 0-1 -// # range are rendered. -// -bm mult_value # bump multiplier (for bump maps -// only) -// -// -imfchan r | g | b | m | l | z # specifies which channel of the file -// is used to -// # create a scalar or bump texture. -// r:red, g:green, -// # b:blue, m:matte, l:luminance, -// z:z-depth.. -// # (the default for bump is 'l' and -// for decal is 'm') -// bump -imfchan r bumpmap.tga # says to use the red channel of -// bumpmap.tga as the bumpmap -// -// For reflection maps... -// -// -type sphere # specifies a sphere for a "refl" -// reflection map -// -type cube_top | cube_bottom | # when using a cube map, the texture -// file for each -// cube_front | cube_back | # side of the cube is specified -// separately -// cube_left | cube_right -// -// TinyObjLoader extension. -// -// -colorspace SPACE # Color space of the texture. e.g. -// 'sRGB` or 'linear' -// - -#ifdef TINYOBJLOADER_USE_DOUBLE -//#pragma message "using double" -typedef double real_t; -#else -//#pragma message "using float" -typedef float real_t; -#endif - -typedef enum { - TEXTURE_TYPE_NONE, // default - TEXTURE_TYPE_SPHERE, - TEXTURE_TYPE_CUBE_TOP, - TEXTURE_TYPE_CUBE_BOTTOM, - TEXTURE_TYPE_CUBE_FRONT, - TEXTURE_TYPE_CUBE_BACK, - TEXTURE_TYPE_CUBE_LEFT, - TEXTURE_TYPE_CUBE_RIGHT -} texture_type_t; - -struct texture_option_t { - texture_type_t type; // -type (default TEXTURE_TYPE_NONE) - real_t sharpness; // -boost (default 1.0?) - real_t brightness; // base_value in -mm option (default 0) - real_t contrast; // gain_value in -mm option (default 1) - real_t origin_offset[3]; // -o u [v [w]] (default 0 0 0) - real_t scale[3]; // -s u [v [w]] (default 1 1 1) - real_t turbulence[3]; // -t u [v [w]] (default 0 0 0) - int texture_resolution; // -texres resolution (No default value in the spec. - // We'll use -1) - bool clamp; // -clamp (default false) - char imfchan; // -imfchan (the default for bump is 'l' and for decal is 'm') - bool blendu; // -blendu (default on) - bool blendv; // -blendv (default on) - real_t bump_multiplier; // -bm (for bump maps only, default 1.0) - - // extension - std::string colorspace; // Explicitly specify color space of stored texel - // value. Usually `sRGB` or `linear` (default empty). -}; - -struct material_t { - std::string name; - - real_t ambient[3]; - real_t diffuse[3]; - real_t specular[3]; - real_t transmittance[3]; - real_t emission[3]; - real_t shininess; - real_t ior; // index of refraction - real_t dissolve; // 1 == opaque; 0 == fully transparent - // illumination model (see http://www.fileformat.info/format/material/) - int illum; - - int dummy; // Suppress padding warning. - - std::string ambient_texname; // map_Ka - std::string diffuse_texname; // map_Kd - std::string specular_texname; // map_Ks - std::string specular_highlight_texname; // map_Ns - std::string bump_texname; // map_bump, map_Bump, bump - std::string displacement_texname; // disp - std::string alpha_texname; // map_d - std::string reflection_texname; // refl - - texture_option_t ambient_texopt; - texture_option_t diffuse_texopt; - texture_option_t specular_texopt; - texture_option_t specular_highlight_texopt; - texture_option_t bump_texopt; - texture_option_t displacement_texopt; - texture_option_t alpha_texopt; - texture_option_t reflection_texopt; - - // PBR extension - // http://exocortex.com/blog/extending_wavefront_mtl_to_support_pbr - real_t roughness; // [0, 1] default 0 - real_t metallic; // [0, 1] default 0 - real_t sheen; // [0, 1] default 0 - real_t clearcoat_thickness; // [0, 1] default 0 - real_t clearcoat_roughness; // [0, 1] default 0 - real_t anisotropy; // aniso. [0, 1] default 0 - real_t anisotropy_rotation; // anisor. [0, 1] default 0 - real_t pad0; - std::string roughness_texname; // map_Pr - std::string metallic_texname; // map_Pm - std::string sheen_texname; // map_Ps - std::string emissive_texname; // map_Ke - std::string normal_texname; // norm. For normal mapping. - - texture_option_t roughness_texopt; - texture_option_t metallic_texopt; - texture_option_t sheen_texopt; - texture_option_t emissive_texopt; - texture_option_t normal_texopt; - - int pad2; - - std::map unknown_parameter; - -#ifdef TINY_OBJ_LOADER_PYTHON_BINDING - // For pybind11 - std::array GetDiffuse() { - std::array values; - values[0] = double(diffuse[0]); - values[1] = double(diffuse[1]); - values[2] = double(diffuse[2]); - - return values; - } - - std::array GetSpecular() { - std::array values; - values[0] = double(specular[0]); - values[1] = double(specular[1]); - values[2] = double(specular[2]); - - return values; - } - - std::array GetTransmittance() { - std::array values; - values[0] = double(transmittance[0]); - values[1] = double(transmittance[1]); - values[2] = double(transmittance[2]); - - return values; - } - - std::array GetEmission() { - std::array values; - values[0] = double(emission[0]); - values[1] = double(emission[1]); - values[2] = double(emission[2]); - - return values; - } - - std::array GetAmbient() { - std::array values; - values[0] = double(ambient[0]); - values[1] = double(ambient[1]); - values[2] = double(ambient[2]); - - return values; - } - - void SetDiffuse(std::array &a) { - diffuse[0] = real_t(a[0]); - diffuse[1] = real_t(a[1]); - diffuse[2] = real_t(a[2]); - } - - void SetAmbient(std::array &a) { - ambient[0] = real_t(a[0]); - ambient[1] = real_t(a[1]); - ambient[2] = real_t(a[2]); - } - - void SetSpecular(std::array &a) { - specular[0] = real_t(a[0]); - specular[1] = real_t(a[1]); - specular[2] = real_t(a[2]); - } - - void SetTransmittance(std::array &a) { - transmittance[0] = real_t(a[0]); - transmittance[1] = real_t(a[1]); - transmittance[2] = real_t(a[2]); - } - - std::string GetCustomParameter(const std::string &key) { - std::map::const_iterator it = - unknown_parameter.find(key); - - if (it != unknown_parameter.end()) { - return it->second; - } - return std::string(); - } - -#endif -}; - -struct tag_t { - std::string name; - - std::vector intValues; - std::vector floatValues; - std::vector stringValues; -}; - -struct joint_and_weight_t { - int joint_id; - real_t weight; -}; - -struct skin_weight_t { - int vertex_id; // Corresponding vertex index in `attrib_t::vertices`. - // Compared to `index_t`, this index must be positive and - // start with 0(does not allow relative indexing) - std::vector weightValues; -}; - -// Index struct to support different indices for vtx/normal/texcoord. -// -1 means not used. -struct index_t { - int vertex_index; - int normal_index; - int texcoord_index; -}; - -struct mesh_t { - std::vector indices; - std::vector - num_face_vertices; // The number of vertices per - // face. 3 = triangle, 4 = quad, - // ... Up to 255 vertices per face. - std::vector material_ids; // per-face material ID - std::vector smoothing_group_ids; // per-face smoothing group - // ID(0 = off. positive value - // = group id) - std::vector tags; // SubD tag -}; - -// struct path_t { -// std::vector indices; // pairs of indices for lines -//}; - -struct lines_t { - // Linear flattened indices. - std::vector indices; // indices for vertices(poly lines) - std::vector num_line_vertices; // The number of vertices per line. -}; - -struct points_t { - std::vector indices; // indices for points -}; - -struct shape_t { - std::string name; - mesh_t mesh; - lines_t lines; - points_t points; -}; - -// Vertex attributes -struct attrib_t { - std::vector vertices; // 'v'(xyz) - - // For backward compatibility, we store vertex weight in separate array. - std::vector vertex_weights; // 'v'(w) - std::vector normals; // 'vn' - std::vector texcoords; // 'vt'(uv) - - // For backward compatibility, we store texture coordinate 'w' in separate - // array. - std::vector texcoord_ws; // 'vt'(w) - std::vector colors; // extension: vertex colors - - // - // TinyObj extension. - // - - // NOTE(syoyo): array index is based on the appearance order. - // To get a corresponding skin weight for a specific vertex id `vid`, - // Need to reconstruct a look up table: `skin_weight_t::vertex_id` == `vid` - // (e.g. using std::map, std::unordered_map) - std::vector skin_weights; - - attrib_t() {} - - // - // For pybind11 - // - const std::vector &GetVertices() const { return vertices; } - - const std::vector &GetVertexWeights() const { return vertex_weights; } -}; - -struct callback_t { - // W is optional and set to 1 if there is no `w` item in `v` line - void (*vertex_cb)(void *user_data, real_t x, real_t y, real_t z, real_t w); - void (*vertex_color_cb)(void *user_data, real_t x, real_t y, real_t z, - real_t r, real_t g, real_t b, bool has_color); - void (*normal_cb)(void *user_data, real_t x, real_t y, real_t z); - - // y and z are optional and set to 0 if there is no `y` and/or `z` item(s) in - // `vt` line. - void (*texcoord_cb)(void *user_data, real_t x, real_t y, real_t z); - - // called per 'f' line. num_indices is the number of face indices(e.g. 3 for - // triangle, 4 for quad) - // 0 will be passed for undefined index in index_t members. - void (*index_cb)(void *user_data, index_t *indices, int num_indices); - // `name` material name, `material_id` = the array index of material_t[]. -1 - // if - // a material not found in .mtl - void (*usemtl_cb)(void *user_data, const char *name, int material_id); - // `materials` = parsed material data. - void (*mtllib_cb)(void *user_data, const material_t *materials, - int num_materials); - // There may be multiple group names - void (*group_cb)(void *user_data, const char **names, int num_names); - void (*object_cb)(void *user_data, const char *name); - - callback_t() - : vertex_cb(NULL), - normal_cb(NULL), - texcoord_cb(NULL), - index_cb(NULL), - usemtl_cb(NULL), - mtllib_cb(NULL), - group_cb(NULL), - object_cb(NULL) {} -}; - -class MaterialReader { - public: - MaterialReader() {} - virtual ~MaterialReader(); - - virtual bool operator()(const std::string &matId, - std::vector *materials, - std::map *matMap, std::string *warn, - std::string *err) = 0; -}; - -/// -/// Read .mtl from a file. -/// -class MaterialFileReader : public MaterialReader { - public: - // Path could contain separator(';' in Windows, ':' in Posix) - explicit MaterialFileReader(const std::string &mtl_basedir) - : m_mtlBaseDir(mtl_basedir) {} - virtual ~MaterialFileReader() TINYOBJ_OVERRIDE {} - virtual bool operator()(const std::string &matId, - std::vector *materials, - std::map *matMap, std::string *warn, - std::string *err) TINYOBJ_OVERRIDE; - - private: - std::string m_mtlBaseDir; -}; - -/// -/// Read .mtl from a stream. -/// -class MaterialStreamReader : public MaterialReader { - public: - explicit MaterialStreamReader(std::istream &inStream) - : m_inStream(inStream) {} - virtual ~MaterialStreamReader() TINYOBJ_OVERRIDE {} - virtual bool operator()(const std::string &matId, - std::vector *materials, - std::map *matMap, std::string *warn, - std::string *err) TINYOBJ_OVERRIDE; - - private: - std::istream &m_inStream; -}; - -// v2 API -struct ObjReaderConfig { - bool triangulate; // triangulate polygon? - - // Currently not used. - // "simple" or empty: Create triangle fan - // "earcut": Use the algorithm based on Ear clipping - std::string triangulation_method; - - /// Parse vertex color. - /// If vertex color is not present, its filled with default value. - /// false = no vertex color - /// This will increase memory of parsed .obj - bool vertex_color; - - /// - /// Search path to .mtl file. - /// Default = "" = search from the same directory of .obj file. - /// Valid only when loading .obj from a file. - /// - std::string mtl_search_path; - - ObjReaderConfig() - : triangulate(true), triangulation_method("simple"), vertex_color(true) {} -}; - -/// -/// Wavefront .obj reader class(v2 API) -/// -class ObjReader { - public: - ObjReader() : valid_(false) {} - - /// - /// Load .obj and .mtl from a file. - /// - /// @param[in] filename wavefront .obj filename - /// @param[in] config Reader configuration - /// - bool ParseFromFile(const std::string &filename, - const ObjReaderConfig &config = ObjReaderConfig()); - - /// - /// Parse .obj from a text string. - /// Need to supply .mtl text string by `mtl_text`. - /// This function ignores `mtllib` line in .obj text. - /// - /// @param[in] obj_text wavefront .obj filename - /// @param[in] mtl_text wavefront .mtl filename - /// @param[in] config Reader configuration - /// - bool ParseFromString(const std::string &obj_text, const std::string &mtl_text, - const ObjReaderConfig &config = ObjReaderConfig()); - - /// - /// .obj was loaded or parsed correctly. - /// - bool Valid() const { return valid_; } - - const attrib_t &GetAttrib() const { return attrib_; } - - const std::vector &GetShapes() const { return shapes_; } - - const std::vector &GetMaterials() const { return materials_; } - - /// - /// Warning message(may be filled after `Load` or `Parse`) - /// - const std::string &Warning() const { return warning_; } - - /// - /// Error message(filled when `Load` or `Parse` failed) - /// - const std::string &Error() const { return error_; } - - private: - bool valid_; - - attrib_t attrib_; - std::vector shapes_; - std::vector materials_; - - std::string warning_; - std::string error_; -}; - -/// ==>>========= Legacy v1 API ============================================= - -/// Loads .obj from a file. -/// 'attrib', 'shapes' and 'materials' will be filled with parsed shape data -/// 'shapes' will be filled with parsed shape data -/// Returns true when loading .obj become success. -/// Returns warning message into `warn`, and error message into `err` -/// 'mtl_basedir' is optional, and used for base directory for .mtl file. -/// In default(`NULL'), .mtl file is searched from an application's working -/// directory. -/// 'triangulate' is optional, and used whether triangulate polygon face in .obj -/// or not. -/// Option 'default_vcols_fallback' specifies whether vertex colors should -/// always be defined, even if no colors are given (fallback to white). -bool LoadObj(attrib_t *attrib, std::vector *shapes, - std::vector *materials, std::string *warn, - std::string *err, const char *filename, - const char *mtl_basedir = NULL, bool triangulate = true, - bool default_vcols_fallback = true); - -/// Loads .obj from a file with custom user callback. -/// .mtl is loaded as usual and parsed material_t data will be passed to -/// `callback.mtllib_cb`. -/// Returns true when loading .obj/.mtl become success. -/// Returns warning message into `warn`, and error message into `err` -/// See `examples/callback_api/` for how to use this function. -bool LoadObjWithCallback(std::istream &inStream, const callback_t &callback, - void *user_data = NULL, - MaterialReader *readMatFn = NULL, - std::string *warn = NULL, std::string *err = NULL); - -/// Loads object from a std::istream, uses `readMatFn` to retrieve -/// std::istream for materials. -/// Returns true when loading .obj become success. -/// Returns warning and error message into `err` -bool LoadObj(attrib_t *attrib, std::vector *shapes, - std::vector *materials, std::string *warn, - std::string *err, std::istream *inStream, - MaterialReader *readMatFn = NULL, bool triangulate = true, - bool default_vcols_fallback = true); - -/// Loads materials into std::map -void LoadMtl(std::map *material_map, - std::vector *materials, std::istream *inStream, - std::string *warning, std::string *err); - -/// -/// Parse texture name and texture option for custom texture parameter through -/// material::unknown_parameter -/// -/// @param[out] texname Parsed texture name -/// @param[out] texopt Parsed texopt -/// @param[in] linebuf Input string -/// -bool ParseTextureNameAndOption(std::string *texname, texture_option_t *texopt, - const char *linebuf); - -/// =<<========== Legacy v1 API ============================================= - -} // namespace tinyobj - -#endif // TINY_OBJ_LOADER_H_ - -#ifdef TINYOBJLOADER_IMPLEMENTATION -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#ifdef TINYOBJLOADER_USE_MAPBOX_EARCUT - -#ifdef TINYOBJLOADER_DONOT_INCLUDE_MAPBOX_EARCUT -// Assume earcut.hpp is included outside of tiny_obj_loader.h -#else - -#ifdef __clang__ -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Weverything" -#endif - -#include -#include "mapbox/earcut.hpp" - -#ifdef __clang__ -#pragma clang diagnostic pop -#endif - -#endif - -#endif // TINYOBJLOADER_USE_MAPBOX_EARCUT - -namespace tinyobj { - -MaterialReader::~MaterialReader() {} - -struct vertex_index_t { - int v_idx, vt_idx, vn_idx; - vertex_index_t() : v_idx(-1), vt_idx(-1), vn_idx(-1) {} - explicit vertex_index_t(int idx) : v_idx(idx), vt_idx(idx), vn_idx(idx) {} - vertex_index_t(int vidx, int vtidx, int vnidx) - : v_idx(vidx), vt_idx(vtidx), vn_idx(vnidx) {} -}; - -// Internal data structure for face representation -// index + smoothing group. -struct face_t { - unsigned int - smoothing_group_id; // smoothing group id. 0 = smoothing groupd is off. - int pad_; - std::vector vertex_indices; // face vertex indices. - - face_t() : smoothing_group_id(0), pad_(0) {} -}; - -// Internal data structure for line representation -struct __line_t { - // l v1/vt1 v2/vt2 ... - // In the specification, line primitrive does not have normal index, but - // TinyObjLoader allow it - std::vector vertex_indices; -}; - -// Internal data structure for points representation -struct __points_t { - // p v1 v2 ... - // In the specification, point primitrive does not have normal index and - // texture coord index, but TinyObjLoader allow it. - std::vector vertex_indices; -}; - -struct tag_sizes { - tag_sizes() : num_ints(0), num_reals(0), num_strings(0) {} - int num_ints; - int num_reals; - int num_strings; -}; - -struct obj_shape { - std::vector v; - std::vector vn; - std::vector vt; -}; - -// -// Manages group of primitives(face, line, points, ...) -struct PrimGroup { - std::vector faceGroup; - std::vector<__line_t> lineGroup; - std::vector<__points_t> pointsGroup; - - void clear() { - faceGroup.clear(); - lineGroup.clear(); - pointsGroup.clear(); - } - - bool IsEmpty() const { - return faceGroup.empty() && lineGroup.empty() && pointsGroup.empty(); - } - - // TODO(syoyo): bspline, surface, ... -}; - -// See -// http://stackoverflow.com/questions/6089231/getting-std-ifstream-to-handle-lf-cr-and-crlf -static std::istream &safeGetline(std::istream &is, std::string &t) { - t.clear(); - - // The characters in the stream are read one-by-one using a std::streambuf. - // That is faster than reading them one-by-one using the std::istream. - // Code that uses streambuf this way must be guarded by a sentry object. - // The sentry object performs various tasks, - // such as thread synchronization and updating the stream state. - - std::istream::sentry se(is, true); - std::streambuf *sb = is.rdbuf(); - - if (se) { - for (;;) { - int c = sb->sbumpc(); - switch (c) { - case '\n': - return is; - case '\r': - if (sb->sgetc() == '\n') sb->sbumpc(); - return is; - case EOF: - // Also handle the case when the last line has no line ending - if (t.empty()) is.setstate(std::ios::eofbit); - return is; - default: - t += static_cast(c); - } - } - } - - return is; -} - -#define IS_SPACE(x) (((x) == ' ') || ((x) == '\t')) -#define IS_DIGIT(x) \ - (static_cast((x) - '0') < static_cast(10)) -#define IS_NEW_LINE(x) (((x) == '\r') || ((x) == '\n') || ((x) == '\0')) - -template -static inline std::string toString(const T &t) { - std::stringstream ss; - ss << t; - return ss.str(); -} - -struct warning_context -{ - std::string *warn; - size_t line_number; -}; - -// Make index zero-base, and also support relative index. -static inline bool fixIndex(int idx, int n, int *ret, bool allow_zero, const warning_context &context) { - if (!ret) { - return false; - } - - if (idx > 0) { - (*ret) = idx - 1; - return true; - } - - if (idx == 0) { - // zero is not allowed according to the spec. - if (context.warn) { - (*context.warn) += "A zero value index found (will have a value of -1 for normal and tex indices. Line " - + toString(context.line_number) + ").\n"; - } - - (*ret) = idx - 1; - return allow_zero; - } - - if (idx < 0) { - (*ret) = n + idx; // negative value = relative - return true; - } - - return false; // never reach here. -} - -static inline std::string parseString(const char **token) { - std::string s; - (*token) += strspn((*token), " \t"); - size_t e = strcspn((*token), " \t\r"); - s = std::string((*token), &(*token)[e]); - (*token) += e; - return s; -} - -static inline int parseInt(const char **token) { - (*token) += strspn((*token), " \t"); - int i = atoi((*token)); - (*token) += strcspn((*token), " \t\r"); - return i; -} - -// Tries to parse a floating point number located at s. -// -// s_end should be a location in the string where reading should absolutely -// stop. For example at the end of the string, to prevent buffer overflows. -// -// Parses the following EBNF grammar: -// sign = "+" | "-" ; -// END = ? anything not in digit ? -// digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" ; -// integer = [sign] , digit , {digit} ; -// decimal = integer , ["." , integer] ; -// float = ( decimal , END ) | ( decimal , ("E" | "e") , integer , END ) ; -// -// Valid strings are for example: -// -0 +3.1417e+2 -0.0E-3 1.0324 -1.41 11e2 -// -// If the parsing is a success, result is set to the parsed value and true -// is returned. -// -// The function is greedy and will parse until any of the following happens: -// - a non-conforming character is encountered. -// - s_end is reached. -// -// The following situations triggers a failure: -// - s >= s_end. -// - parse failure. -// -static bool tryParseDouble(const char *s, const char *s_end, double *result) { - if (s >= s_end) { - return false; - } - - double mantissa = 0.0; - // This exponent is base 2 rather than 10. - // However the exponent we parse is supposed to be one of ten, - // thus we must take care to convert the exponent/and or the - // mantissa to a * 2^E, where a is the mantissa and E is the - // exponent. - // To get the final double we will use ldexp, it requires the - // exponent to be in base 2. - int exponent = 0; - - // NOTE: THESE MUST BE DECLARED HERE SINCE WE ARE NOT ALLOWED - // TO JUMP OVER DEFINITIONS. - char sign = '+'; - char exp_sign = '+'; - char const *curr = s; - - // How many characters were read in a loop. - int read = 0; - // Tells whether a loop terminated due to reaching s_end. - bool end_not_reached = false; - bool leading_decimal_dots = false; - - /* - BEGIN PARSING. - */ - - // Find out what sign we've got. - if (*curr == '+' || *curr == '-') { - sign = *curr; - curr++; - if ((curr != s_end) && (*curr == '.')) { - // accept. Somethig like `.7e+2`, `-.5234` - leading_decimal_dots = true; - } - } else if (IS_DIGIT(*curr)) { /* Pass through. */ - } else if (*curr == '.') { - // accept. Somethig like `.7e+2`, `-.5234` - leading_decimal_dots = true; - } else { - goto fail; - } - - // Read the integer part. - end_not_reached = (curr != s_end); - if (!leading_decimal_dots) { - while (end_not_reached && IS_DIGIT(*curr)) { - mantissa *= 10; - mantissa += static_cast(*curr - 0x30); - curr++; - read++; - end_not_reached = (curr != s_end); - } - - // We must make sure we actually got something. - if (read == 0) goto fail; - } - - // We allow numbers of form "#", "###" etc. - if (!end_not_reached) goto assemble; - - // Read the decimal part. - if (*curr == '.') { - curr++; - read = 1; - end_not_reached = (curr != s_end); - while (end_not_reached && IS_DIGIT(*curr)) { - static const double pow_lut[] = { - 1.0, 0.1, 0.01, 0.001, 0.0001, 0.00001, 0.000001, 0.0000001, - }; - const int lut_entries = sizeof pow_lut / sizeof pow_lut[0]; - - // NOTE: Don't use powf here, it will absolutely murder precision. - mantissa += static_cast(*curr - 0x30) * - (read < lut_entries ? pow_lut[read] : std::pow(10.0, -read)); - read++; - curr++; - end_not_reached = (curr != s_end); - } - } else if (*curr == 'e' || *curr == 'E') { - } else { - goto assemble; - } - - if (!end_not_reached) goto assemble; - - // Read the exponent part. - if (*curr == 'e' || *curr == 'E') { - curr++; - // Figure out if a sign is present and if it is. - end_not_reached = (curr != s_end); - if (end_not_reached && (*curr == '+' || *curr == '-')) { - exp_sign = *curr; - curr++; - } else if (IS_DIGIT(*curr)) { /* Pass through. */ - } else { - // Empty E is not allowed. - goto fail; - } - - read = 0; - end_not_reached = (curr != s_end); - while (end_not_reached && IS_DIGIT(*curr)) { - // To avoid annoying MSVC's min/max macro definiton, - // Use hardcoded int max value - if (exponent > (2147483647/10)) { // 2147483647 = std::numeric_limits::max() - // Integer overflow - goto fail; - } - exponent *= 10; - exponent += static_cast(*curr - 0x30); - curr++; - read++; - end_not_reached = (curr != s_end); - } - exponent *= (exp_sign == '+' ? 1 : -1); - if (read == 0) goto fail; - } - -assemble: - *result = (sign == '+' ? 1 : -1) * - (exponent ? std::ldexp(mantissa * std::pow(5.0, exponent), exponent) - : mantissa); - return true; -fail: - return false; -} - -static inline real_t parseReal(const char **token, double default_value = 0.0) { - (*token) += strspn((*token), " \t"); - const char *end = (*token) + strcspn((*token), " \t\r"); - double val = default_value; - tryParseDouble((*token), end, &val); - real_t f = static_cast(val); - (*token) = end; - return f; -} - -static inline bool parseReal(const char **token, real_t *out) { - (*token) += strspn((*token), " \t"); - const char *end = (*token) + strcspn((*token), " \t\r"); - double val; - bool ret = tryParseDouble((*token), end, &val); - if (ret) { - real_t f = static_cast(val); - (*out) = f; - } - (*token) = end; - return ret; -} - -static inline void parseReal2(real_t *x, real_t *y, const char **token, - const double default_x = 0.0, - const double default_y = 0.0) { - (*x) = parseReal(token, default_x); - (*y) = parseReal(token, default_y); -} - -static inline void parseReal3(real_t *x, real_t *y, real_t *z, - const char **token, const double default_x = 0.0, - const double default_y = 0.0, - const double default_z = 0.0) { - (*x) = parseReal(token, default_x); - (*y) = parseReal(token, default_y); - (*z) = parseReal(token, default_z); -} - -static inline void parseV(real_t *x, real_t *y, real_t *z, real_t *w, - const char **token, const double default_x = 0.0, - const double default_y = 0.0, - const double default_z = 0.0, - const double default_w = 1.0) { - (*x) = parseReal(token, default_x); - (*y) = parseReal(token, default_y); - (*z) = parseReal(token, default_z); - (*w) = parseReal(token, default_w); -} - -// Extension: parse vertex with colors(6 items) -static inline bool parseVertexWithColor(real_t *x, real_t *y, real_t *z, - real_t *r, real_t *g, real_t *b, - const char **token, - const double default_x = 0.0, - const double default_y = 0.0, - const double default_z = 0.0) { - (*x) = parseReal(token, default_x); - (*y) = parseReal(token, default_y); - (*z) = parseReal(token, default_z); - - const bool found_color = - parseReal(token, r) && parseReal(token, g) && parseReal(token, b); - - if (!found_color) { - (*r) = (*g) = (*b) = 1.0; - } - - return found_color; -} - -static inline bool parseOnOff(const char **token, bool default_value = true) { - (*token) += strspn((*token), " \t"); - const char *end = (*token) + strcspn((*token), " \t\r"); - - bool ret = default_value; - if ((0 == strncmp((*token), "on", 2))) { - ret = true; - } else if ((0 == strncmp((*token), "off", 3))) { - ret = false; - } - - (*token) = end; - return ret; -} - -static inline texture_type_t parseTextureType( - const char **token, texture_type_t default_value = TEXTURE_TYPE_NONE) { - (*token) += strspn((*token), " \t"); - const char *end = (*token) + strcspn((*token), " \t\r"); - texture_type_t ty = default_value; - - if ((0 == strncmp((*token), "cube_top", strlen("cube_top")))) { - ty = TEXTURE_TYPE_CUBE_TOP; - } else if ((0 == strncmp((*token), "cube_bottom", strlen("cube_bottom")))) { - ty = TEXTURE_TYPE_CUBE_BOTTOM; - } else if ((0 == strncmp((*token), "cube_left", strlen("cube_left")))) { - ty = TEXTURE_TYPE_CUBE_LEFT; - } else if ((0 == strncmp((*token), "cube_right", strlen("cube_right")))) { - ty = TEXTURE_TYPE_CUBE_RIGHT; - } else if ((0 == strncmp((*token), "cube_front", strlen("cube_front")))) { - ty = TEXTURE_TYPE_CUBE_FRONT; - } else if ((0 == strncmp((*token), "cube_back", strlen("cube_back")))) { - ty = TEXTURE_TYPE_CUBE_BACK; - } else if ((0 == strncmp((*token), "sphere", strlen("sphere")))) { - ty = TEXTURE_TYPE_SPHERE; - } - - (*token) = end; - return ty; -} - -static tag_sizes parseTagTriple(const char **token) { - tag_sizes ts; - - (*token) += strspn((*token), " \t"); - ts.num_ints = atoi((*token)); - (*token) += strcspn((*token), "/ \t\r"); - if ((*token)[0] != '/') { - return ts; - } - - (*token)++; // Skip '/' - - (*token) += strspn((*token), " \t"); - ts.num_reals = atoi((*token)); - (*token) += strcspn((*token), "/ \t\r"); - if ((*token)[0] != '/') { - return ts; - } - (*token)++; // Skip '/' - - ts.num_strings = parseInt(token); - - return ts; -} - -// Parse triples with index offsets: i, i/j/k, i//k, i/j -static bool parseTriple(const char **token, int vsize, int vnsize, int vtsize, - vertex_index_t *ret, const warning_context &context) { - if (!ret) { - return false; - } - - vertex_index_t vi(-1); - - if (!fixIndex(atoi((*token)), vsize, &vi.v_idx, false, context)) { - return false; - } - - (*token) += strcspn((*token), "/ \t\r"); - if ((*token)[0] != '/') { - (*ret) = vi; - return true; - } - (*token)++; - - // i//k - if ((*token)[0] == '/') { - (*token)++; - if (!fixIndex(atoi((*token)), vnsize, &vi.vn_idx, true, context)) { - return false; - } - (*token) += strcspn((*token), "/ \t\r"); - (*ret) = vi; - return true; - } - - // i/j/k or i/j - if (!fixIndex(atoi((*token)), vtsize, &vi.vt_idx, true, context)) { - return false; - } - - (*token) += strcspn((*token), "/ \t\r"); - if ((*token)[0] != '/') { - (*ret) = vi; - return true; - } - - // i/j/k - (*token)++; // skip '/' - if (!fixIndex(atoi((*token)), vnsize, &vi.vn_idx, true, context)) { - return false; - } - (*token) += strcspn((*token), "/ \t\r"); - - (*ret) = vi; - - return true; -} - -// Parse raw triples: i, i/j/k, i//k, i/j -static vertex_index_t parseRawTriple(const char **token) { - vertex_index_t vi(static_cast(0)); // 0 is an invalid index in OBJ - - vi.v_idx = atoi((*token)); - (*token) += strcspn((*token), "/ \t\r"); - if ((*token)[0] != '/') { - return vi; - } - (*token)++; - - // i//k - if ((*token)[0] == '/') { - (*token)++; - vi.vn_idx = atoi((*token)); - (*token) += strcspn((*token), "/ \t\r"); - return vi; - } - - // i/j/k or i/j - vi.vt_idx = atoi((*token)); - (*token) += strcspn((*token), "/ \t\r"); - if ((*token)[0] != '/') { - return vi; - } - - // i/j/k - (*token)++; // skip '/' - vi.vn_idx = atoi((*token)); - (*token) += strcspn((*token), "/ \t\r"); - return vi; -} - -bool ParseTextureNameAndOption(std::string *texname, texture_option_t *texopt, - const char *linebuf) { - // @todo { write more robust lexer and parser. } - bool found_texname = false; - std::string texture_name; - - const char *token = linebuf; // Assume line ends with NULL - - while (!IS_NEW_LINE((*token))) { - token += strspn(token, " \t"); // skip space - if ((0 == strncmp(token, "-blendu", 7)) && IS_SPACE((token[7]))) { - token += 8; - texopt->blendu = parseOnOff(&token, /* default */ true); - } else if ((0 == strncmp(token, "-blendv", 7)) && IS_SPACE((token[7]))) { - token += 8; - texopt->blendv = parseOnOff(&token, /* default */ true); - } else if ((0 == strncmp(token, "-clamp", 6)) && IS_SPACE((token[6]))) { - token += 7; - texopt->clamp = parseOnOff(&token, /* default */ true); - } else if ((0 == strncmp(token, "-boost", 6)) && IS_SPACE((token[6]))) { - token += 7; - texopt->sharpness = parseReal(&token, 1.0); - } else if ((0 == strncmp(token, "-bm", 3)) && IS_SPACE((token[3]))) { - token += 4; - texopt->bump_multiplier = parseReal(&token, 1.0); - } else if ((0 == strncmp(token, "-o", 2)) && IS_SPACE((token[2]))) { - token += 3; - parseReal3(&(texopt->origin_offset[0]), &(texopt->origin_offset[1]), - &(texopt->origin_offset[2]), &token); - } else if ((0 == strncmp(token, "-s", 2)) && IS_SPACE((token[2]))) { - token += 3; - parseReal3(&(texopt->scale[0]), &(texopt->scale[1]), &(texopt->scale[2]), - &token, 1.0, 1.0, 1.0); - } else if ((0 == strncmp(token, "-t", 2)) && IS_SPACE((token[2]))) { - token += 3; - parseReal3(&(texopt->turbulence[0]), &(texopt->turbulence[1]), - &(texopt->turbulence[2]), &token); - } else if ((0 == strncmp(token, "-type", 5)) && IS_SPACE((token[5]))) { - token += 5; - texopt->type = parseTextureType((&token), TEXTURE_TYPE_NONE); - } else if ((0 == strncmp(token, "-texres", 7)) && IS_SPACE((token[7]))) { - token += 7; - // TODO(syoyo): Check if arg is int type. - texopt->texture_resolution = parseInt(&token); - } else if ((0 == strncmp(token, "-imfchan", 8)) && IS_SPACE((token[8]))) { - token += 9; - token += strspn(token, " \t"); - const char *end = token + strcspn(token, " \t\r"); - if ((end - token) == 1) { // Assume one char for -imfchan - texopt->imfchan = (*token); - } - token = end; - } else if ((0 == strncmp(token, "-mm", 3)) && IS_SPACE((token[3]))) { - token += 4; - parseReal2(&(texopt->brightness), &(texopt->contrast), &token, 0.0, 1.0); - } else if ((0 == strncmp(token, "-colorspace", 11)) && - IS_SPACE((token[11]))) { - token += 12; - texopt->colorspace = parseString(&token); - } else { -// Assume texture filename -#if 0 - size_t len = strcspn(token, " \t\r"); // untile next space - texture_name = std::string(token, token + len); - token += len; - - token += strspn(token, " \t"); // skip space -#else - // Read filename until line end to parse filename containing whitespace - // TODO(syoyo): Support parsing texture option flag after the filename. - texture_name = std::string(token); - token += texture_name.length(); -#endif - - found_texname = true; - } - } - - if (found_texname) { - (*texname) = texture_name; - return true; - } else { - return false; - } -} - -static void InitTexOpt(texture_option_t *texopt, const bool is_bump) { - if (is_bump) { - texopt->imfchan = 'l'; - } else { - texopt->imfchan = 'm'; - } - texopt->bump_multiplier = static_cast(1.0); - texopt->clamp = false; - texopt->blendu = true; - texopt->blendv = true; - texopt->sharpness = static_cast(1.0); - texopt->brightness = static_cast(0.0); - texopt->contrast = static_cast(1.0); - texopt->origin_offset[0] = static_cast(0.0); - texopt->origin_offset[1] = static_cast(0.0); - texopt->origin_offset[2] = static_cast(0.0); - texopt->scale[0] = static_cast(1.0); - texopt->scale[1] = static_cast(1.0); - texopt->scale[2] = static_cast(1.0); - texopt->turbulence[0] = static_cast(0.0); - texopt->turbulence[1] = static_cast(0.0); - texopt->turbulence[2] = static_cast(0.0); - texopt->texture_resolution = -1; - texopt->type = TEXTURE_TYPE_NONE; -} - -static void InitMaterial(material_t *material) { - InitTexOpt(&material->ambient_texopt, /* is_bump */ false); - InitTexOpt(&material->diffuse_texopt, /* is_bump */ false); - InitTexOpt(&material->specular_texopt, /* is_bump */ false); - InitTexOpt(&material->specular_highlight_texopt, /* is_bump */ false); - InitTexOpt(&material->bump_texopt, /* is_bump */ true); - InitTexOpt(&material->displacement_texopt, /* is_bump */ false); - InitTexOpt(&material->alpha_texopt, /* is_bump */ false); - InitTexOpt(&material->reflection_texopt, /* is_bump */ false); - InitTexOpt(&material->roughness_texopt, /* is_bump */ false); - InitTexOpt(&material->metallic_texopt, /* is_bump */ false); - InitTexOpt(&material->sheen_texopt, /* is_bump */ false); - InitTexOpt(&material->emissive_texopt, /* is_bump */ false); - InitTexOpt(&material->normal_texopt, - /* is_bump */ false); // @fixme { is_bump will be true? } - material->name = ""; - material->ambient_texname = ""; - material->diffuse_texname = ""; - material->specular_texname = ""; - material->specular_highlight_texname = ""; - material->bump_texname = ""; - material->displacement_texname = ""; - material->reflection_texname = ""; - material->alpha_texname = ""; - for (int i = 0; i < 3; i++) { - material->ambient[i] = static_cast(0.0); - material->diffuse[i] = static_cast(0.0); - material->specular[i] = static_cast(0.0); - material->transmittance[i] = static_cast(0.0); - material->emission[i] = static_cast(0.0); - } - material->illum = 0; - material->dissolve = static_cast(1.0); - material->shininess = static_cast(1.0); - material->ior = static_cast(1.0); - - material->roughness = static_cast(0.0); - material->metallic = static_cast(0.0); - material->sheen = static_cast(0.0); - material->clearcoat_thickness = static_cast(0.0); - material->clearcoat_roughness = static_cast(0.0); - material->anisotropy_rotation = static_cast(0.0); - material->anisotropy = static_cast(0.0); - material->roughness_texname = ""; - material->metallic_texname = ""; - material->sheen_texname = ""; - material->emissive_texname = ""; - material->normal_texname = ""; - - material->unknown_parameter.clear(); -} - -// code from https://wrf.ecse.rpi.edu//Research/Short_Notes/pnpoly.html -template -static int pnpoly(int nvert, T *vertx, T *verty, T testx, T testy) { - int i, j, c = 0; - for (i = 0, j = nvert - 1; i < nvert; j = i++) { - if (((verty[i] > testy) != (verty[j] > testy)) && - (testx < - (vertx[j] - vertx[i]) * (testy - verty[i]) / (verty[j] - verty[i]) + - vertx[i])) - c = !c; - } - return c; -} - -struct TinyObjPoint { - real_t x, y, z; - TinyObjPoint() : x(0), y(0), z(0) {} - TinyObjPoint(real_t x_, real_t y_, real_t z_) : - x(x_), y(y_), z(z_) {} -}; - -inline TinyObjPoint cross(const TinyObjPoint &v1, const TinyObjPoint &v2) { - return TinyObjPoint(v1.y * v2.z - v1.z * v2.y, - v1.z * v2.x - v1.x * v2.z, - v1.x * v2.y - v1.y * v2.x); -} - -inline real_t dot(const TinyObjPoint &v1, const TinyObjPoint &v2) { - return (v1.x * v2.x + v1.y * v2.y + v1.z * v2.z); -} - -inline real_t GetLength(TinyObjPoint &e) { - return std::sqrt(e.x*e.x + e.y*e.y + e.z*e.z); -} - -inline TinyObjPoint Normalize(TinyObjPoint e) { - real_t inv_length = real_t(1) / GetLength(e); - return TinyObjPoint(e.x * inv_length, e.y * inv_length, e.z * inv_length ); -} - - -inline TinyObjPoint WorldToLocal(const TinyObjPoint& a, - const TinyObjPoint& u, - const TinyObjPoint& v, - const TinyObjPoint& w) { - return TinyObjPoint(dot(a,u),dot(a,v),dot(a,w)); -} - - -// TODO(syoyo): refactor function. -static bool exportGroupsToShape(shape_t *shape, const PrimGroup &prim_group, - const std::vector &tags, - const int material_id, const std::string &name, - bool triangulate, const std::vector &v, - std::string *warn) { - if (prim_group.IsEmpty()) { - return false; - } - - shape->name = name; - - // polygon - if (!prim_group.faceGroup.empty()) { - // Flatten vertices and indices - for (size_t i = 0; i < prim_group.faceGroup.size(); i++) { - const face_t &face = prim_group.faceGroup[i]; - - size_t npolys = face.vertex_indices.size(); - - if (npolys < 3) { - // Face must have 3+ vertices. - if (warn) { - (*warn) += "Degenerated face found\n."; - } - continue; - } - - if (triangulate && npolys != 3) { - if (npolys == 4) { - vertex_index_t i0 = face.vertex_indices[0]; - vertex_index_t i1 = face.vertex_indices[1]; - vertex_index_t i2 = face.vertex_indices[2]; - vertex_index_t i3 = face.vertex_indices[3]; - - size_t vi0 = size_t(i0.v_idx); - size_t vi1 = size_t(i1.v_idx); - size_t vi2 = size_t(i2.v_idx); - size_t vi3 = size_t(i3.v_idx); - - if (((3 * vi0 + 2) >= v.size()) || ((3 * vi1 + 2) >= v.size()) || - ((3 * vi2 + 2) >= v.size()) || ((3 * vi3 + 2) >= v.size())) { - // Invalid triangle. - // FIXME(syoyo): Is it ok to simply skip this invalid triangle? - if (warn) { - (*warn) += "Face with invalid vertex index found.\n"; - } - continue; - } - - real_t v0x = v[vi0 * 3 + 0]; - real_t v0y = v[vi0 * 3 + 1]; - real_t v0z = v[vi0 * 3 + 2]; - real_t v1x = v[vi1 * 3 + 0]; - real_t v1y = v[vi1 * 3 + 1]; - real_t v1z = v[vi1 * 3 + 2]; - real_t v2x = v[vi2 * 3 + 0]; - real_t v2y = v[vi2 * 3 + 1]; - real_t v2z = v[vi2 * 3 + 2]; - real_t v3x = v[vi3 * 3 + 0]; - real_t v3y = v[vi3 * 3 + 1]; - real_t v3z = v[vi3 * 3 + 2]; - - // There are two candidates to split the quad into two triangles. - // - // Choose the shortest edge. - // TODO: Is it better to determine the edge to split by calculating - // the area of each triangle? - // - // +---+ - // |\ | - // | \ | - // | \| - // +---+ - // - // +---+ - // | /| - // | / | - // |/ | - // +---+ - - real_t e02x = v2x - v0x; - real_t e02y = v2y - v0y; - real_t e02z = v2z - v0z; - real_t e13x = v3x - v1x; - real_t e13y = v3y - v1y; - real_t e13z = v3z - v1z; - - real_t sqr02 = e02x * e02x + e02y * e02y + e02z * e02z; - real_t sqr13 = e13x * e13x + e13y * e13y + e13z * e13z; - - index_t idx0, idx1, idx2, idx3; - - idx0.vertex_index = i0.v_idx; - idx0.normal_index = i0.vn_idx; - idx0.texcoord_index = i0.vt_idx; - idx1.vertex_index = i1.v_idx; - idx1.normal_index = i1.vn_idx; - idx1.texcoord_index = i1.vt_idx; - idx2.vertex_index = i2.v_idx; - idx2.normal_index = i2.vn_idx; - idx2.texcoord_index = i2.vt_idx; - idx3.vertex_index = i3.v_idx; - idx3.normal_index = i3.vn_idx; - idx3.texcoord_index = i3.vt_idx; - - if (sqr02 < sqr13) { - // [0, 1, 2], [0, 2, 3] - shape->mesh.indices.push_back(idx0); - shape->mesh.indices.push_back(idx1); - shape->mesh.indices.push_back(idx2); - - shape->mesh.indices.push_back(idx0); - shape->mesh.indices.push_back(idx2); - shape->mesh.indices.push_back(idx3); - } else { - // [0, 1, 3], [1, 2, 3] - shape->mesh.indices.push_back(idx0); - shape->mesh.indices.push_back(idx1); - shape->mesh.indices.push_back(idx3); - - shape->mesh.indices.push_back(idx1); - shape->mesh.indices.push_back(idx2); - shape->mesh.indices.push_back(idx3); - } - - // Two triangle faces - shape->mesh.num_face_vertices.push_back(3); - shape->mesh.num_face_vertices.push_back(3); - - shape->mesh.material_ids.push_back(material_id); - shape->mesh.material_ids.push_back(material_id); - - shape->mesh.smoothing_group_ids.push_back(face.smoothing_group_id); - shape->mesh.smoothing_group_ids.push_back(face.smoothing_group_id); - - } else { -#ifdef TINYOBJLOADER_USE_MAPBOX_EARCUT - vertex_index_t i0 = face.vertex_indices[0]; - vertex_index_t i0_2 = i0; - - // TMW change: Find the normal axis of the polygon using Newell's method - TinyObjPoint n; - for (size_t k = 0; k < npolys; ++k) { - i0 = face.vertex_indices[k % npolys]; - size_t vi0 = size_t(i0.v_idx); - - size_t j = (k + 1) % npolys; - i0_2 = face.vertex_indices[j]; - size_t vi0_2 = size_t(i0_2.v_idx); - - real_t v0x = v[vi0 * 3 + 0]; - real_t v0y = v[vi0 * 3 + 1]; - real_t v0z = v[vi0 * 3 + 2]; - - real_t v0x_2 = v[vi0_2 * 3 + 0]; - real_t v0y_2 = v[vi0_2 * 3 + 1]; - real_t v0z_2 = v[vi0_2 * 3 + 2]; - - const TinyObjPoint point1(v0x,v0y,v0z); - const TinyObjPoint point2(v0x_2,v0y_2,v0z_2); - - TinyObjPoint a(point1.x - point2.x, point1.y - point2.y, point1.z - point2.z); - TinyObjPoint b(point1.x + point2.x, point1.y + point2.y, point1.z + point2.z); - - n.x += (a.y * b.z); - n.y += (a.z * b.x); - n.z += (a.x * b.y); - } - real_t length_n = GetLength(n); - //Check if zero length normal - if(length_n <= 0) { - continue; - } - //Negative is to flip the normal to the correct direction - real_t inv_length = -real_t(1.0) / length_n; - n.x *= inv_length; - n.y *= inv_length; - n.z *= inv_length; - - TinyObjPoint axis_w, axis_v, axis_u; - axis_w = n; - TinyObjPoint a; - if(std::abs(axis_w.x) > real_t(0.9999999)) { - a = TinyObjPoint(0,1,0); - } else { - a = TinyObjPoint(1,0,0); - } - axis_v = Normalize(cross(axis_w, a)); - axis_u = cross(axis_w, axis_v); - using Point = std::array; - - // first polyline define the main polygon. - // following polylines define holes(not used in tinyobj). - std::vector > polygon; - - std::vector polyline; - - //TMW change: Find best normal and project v0x and v0y to those coordinates, instead of - //picking a plane aligned with an axis (which can flip polygons). - - // Fill polygon data(facevarying vertices). - for (size_t k = 0; k < npolys; k++) { - i0 = face.vertex_indices[k]; - size_t vi0 = size_t(i0.v_idx); - - assert(((3 * vi0 + 2) < v.size())); - - real_t v0x = v[vi0 * 3 + 0]; - real_t v0y = v[vi0 * 3 + 1]; - real_t v0z = v[vi0 * 3 + 2]; - - TinyObjPoint polypoint(v0x,v0y,v0z); - TinyObjPoint loc = WorldToLocal(polypoint, axis_u, axis_v, axis_w); - - polyline.push_back({loc.x, loc.y}); - } - - polygon.push_back(polyline); - std::vector indices = mapbox::earcut(polygon); - // => result = 3 * faces, clockwise - - assert(indices.size() % 3 == 0); - - // Reconstruct vertex_index_t - for (size_t k = 0; k < indices.size() / 3; k++) { - { - index_t idx0, idx1, idx2; - idx0.vertex_index = face.vertex_indices[indices[3 * k + 0]].v_idx; - idx0.normal_index = - face.vertex_indices[indices[3 * k + 0]].vn_idx; - idx0.texcoord_index = - face.vertex_indices[indices[3 * k + 0]].vt_idx; - idx1.vertex_index = face.vertex_indices[indices[3 * k + 1]].v_idx; - idx1.normal_index = - face.vertex_indices[indices[3 * k + 1]].vn_idx; - idx1.texcoord_index = - face.vertex_indices[indices[3 * k + 1]].vt_idx; - idx2.vertex_index = face.vertex_indices[indices[3 * k + 2]].v_idx; - idx2.normal_index = - face.vertex_indices[indices[3 * k + 2]].vn_idx; - idx2.texcoord_index = - face.vertex_indices[indices[3 * k + 2]].vt_idx; - - shape->mesh.indices.push_back(idx0); - shape->mesh.indices.push_back(idx1); - shape->mesh.indices.push_back(idx2); - - shape->mesh.num_face_vertices.push_back(3); - shape->mesh.material_ids.push_back(material_id); - shape->mesh.smoothing_group_ids.push_back( - face.smoothing_group_id); - } - } - -#else // Built-in ear clipping triangulation - vertex_index_t i0 = face.vertex_indices[0]; - vertex_index_t i1(-1); - vertex_index_t i2 = face.vertex_indices[1]; - - // find the two axes to work in - size_t axes[2] = {1, 2}; - for (size_t k = 0; k < npolys; ++k) { - i0 = face.vertex_indices[(k + 0) % npolys]; - i1 = face.vertex_indices[(k + 1) % npolys]; - i2 = face.vertex_indices[(k + 2) % npolys]; - size_t vi0 = size_t(i0.v_idx); - size_t vi1 = size_t(i1.v_idx); - size_t vi2 = size_t(i2.v_idx); - - if (((3 * vi0 + 2) >= v.size()) || ((3 * vi1 + 2) >= v.size()) || - ((3 * vi2 + 2) >= v.size())) { - // Invalid triangle. - // FIXME(syoyo): Is it ok to simply skip this invalid triangle? - continue; - } - real_t v0x = v[vi0 * 3 + 0]; - real_t v0y = v[vi0 * 3 + 1]; - real_t v0z = v[vi0 * 3 + 2]; - real_t v1x = v[vi1 * 3 + 0]; - real_t v1y = v[vi1 * 3 + 1]; - real_t v1z = v[vi1 * 3 + 2]; - real_t v2x = v[vi2 * 3 + 0]; - real_t v2y = v[vi2 * 3 + 1]; - real_t v2z = v[vi2 * 3 + 2]; - real_t e0x = v1x - v0x; - real_t e0y = v1y - v0y; - real_t e0z = v1z - v0z; - real_t e1x = v2x - v1x; - real_t e1y = v2y - v1y; - real_t e1z = v2z - v1z; - real_t cx = std::fabs(e0y * e1z - e0z * e1y); - real_t cy = std::fabs(e0z * e1x - e0x * e1z); - real_t cz = std::fabs(e0x * e1y - e0y * e1x); - const real_t epsilon = std::numeric_limits::epsilon(); - // std::cout << "cx " << cx << ", cy " << cy << ", cz " << cz << - // "\n"; - if (cx > epsilon || cy > epsilon || cz > epsilon) { - // std::cout << "corner\n"; - // found a corner - if (cx > cy && cx > cz) { - // std::cout << "pattern0\n"; - } else { - // std::cout << "axes[0] = 0\n"; - axes[0] = 0; - if (cz > cx && cz > cy) { - // std::cout << "axes[1] = 1\n"; - axes[1] = 1; - } - } - break; - } - } - - face_t remainingFace = face; // copy - size_t guess_vert = 0; - vertex_index_t ind[3]; - real_t vx[3]; - real_t vy[3]; - - // How many iterations can we do without decreasing the remaining - // vertices. - size_t remainingIterations = face.vertex_indices.size(); - size_t previousRemainingVertices = - remainingFace.vertex_indices.size(); - - while (remainingFace.vertex_indices.size() > 3 && - remainingIterations > 0) { - // std::cout << "remainingIterations " << remainingIterations << - // "\n"; - - npolys = remainingFace.vertex_indices.size(); - if (guess_vert >= npolys) { - guess_vert -= npolys; - } - - if (previousRemainingVertices != npolys) { - // The number of remaining vertices decreased. Reset counters. - previousRemainingVertices = npolys; - remainingIterations = npolys; - } else { - // We didn't consume a vertex on previous iteration, reduce the - // available iterations. - remainingIterations--; - } - - for (size_t k = 0; k < 3; k++) { - ind[k] = remainingFace.vertex_indices[(guess_vert + k) % npolys]; - size_t vi = size_t(ind[k].v_idx); - if (((vi * 3 + axes[0]) >= v.size()) || - ((vi * 3 + axes[1]) >= v.size())) { - // ??? - vx[k] = static_cast(0.0); - vy[k] = static_cast(0.0); - } else { - vx[k] = v[vi * 3 + axes[0]]; - vy[k] = v[vi * 3 + axes[1]]; - } - } - - // - // area is calculated per face - // - real_t e0x = vx[1] - vx[0]; - real_t e0y = vy[1] - vy[0]; - real_t e1x = vx[2] - vx[1]; - real_t e1y = vy[2] - vy[1]; - real_t cross = e0x * e1y - e0y * e1x; - // std::cout << "axes = " << axes[0] << ", " << axes[1] << "\n"; - // std::cout << "e0x, e0y, e1x, e1y " << e0x << ", " << e0y << ", " - // << e1x << ", " << e1y << "\n"; - - real_t area = (vx[0] * vy[1] - vy[0] * vx[1]) * static_cast(0.5); - // std::cout << "cross " << cross << ", area " << area << "\n"; - // if an internal angle - if (cross * area < static_cast(0.0)) { - // std::cout << "internal \n"; - guess_vert += 1; - // std::cout << "guess vert : " << guess_vert << "\n"; - continue; - } - - // check all other verts in case they are inside this triangle - bool overlap = false; - for (size_t otherVert = 3; otherVert < npolys; ++otherVert) { - size_t idx = (guess_vert + otherVert) % npolys; - - if (idx >= remainingFace.vertex_indices.size()) { - // std::cout << "???0\n"; - // ??? - continue; - } - - size_t ovi = size_t(remainingFace.vertex_indices[idx].v_idx); - - if (((ovi * 3 + axes[0]) >= v.size()) || - ((ovi * 3 + axes[1]) >= v.size())) { - // std::cout << "???1\n"; - // ??? - continue; - } - real_t tx = v[ovi * 3 + axes[0]]; - real_t ty = v[ovi * 3 + axes[1]]; - if (pnpoly(3, vx, vy, tx, ty)) { - // std::cout << "overlap\n"; - overlap = true; - break; - } - } - - if (overlap) { - // std::cout << "overlap2\n"; - guess_vert += 1; - continue; - } - - // this triangle is an ear - { - index_t idx0, idx1, idx2; - idx0.vertex_index = ind[0].v_idx; - idx0.normal_index = ind[0].vn_idx; - idx0.texcoord_index = ind[0].vt_idx; - idx1.vertex_index = ind[1].v_idx; - idx1.normal_index = ind[1].vn_idx; - idx1.texcoord_index = ind[1].vt_idx; - idx2.vertex_index = ind[2].v_idx; - idx2.normal_index = ind[2].vn_idx; - idx2.texcoord_index = ind[2].vt_idx; - - shape->mesh.indices.push_back(idx0); - shape->mesh.indices.push_back(idx1); - shape->mesh.indices.push_back(idx2); - - shape->mesh.num_face_vertices.push_back(3); - shape->mesh.material_ids.push_back(material_id); - shape->mesh.smoothing_group_ids.push_back( - face.smoothing_group_id); - } - - // remove v1 from the list - size_t removed_vert_index = (guess_vert + 1) % npolys; - while (removed_vert_index + 1 < npolys) { - remainingFace.vertex_indices[removed_vert_index] = - remainingFace.vertex_indices[removed_vert_index + 1]; - removed_vert_index += 1; - } - remainingFace.vertex_indices.pop_back(); - } - - // std::cout << "remainingFace.vi.size = " << - // remainingFace.vertex_indices.size() << "\n"; - if (remainingFace.vertex_indices.size() == 3) { - i0 = remainingFace.vertex_indices[0]; - i1 = remainingFace.vertex_indices[1]; - i2 = remainingFace.vertex_indices[2]; - { - index_t idx0, idx1, idx2; - idx0.vertex_index = i0.v_idx; - idx0.normal_index = i0.vn_idx; - idx0.texcoord_index = i0.vt_idx; - idx1.vertex_index = i1.v_idx; - idx1.normal_index = i1.vn_idx; - idx1.texcoord_index = i1.vt_idx; - idx2.vertex_index = i2.v_idx; - idx2.normal_index = i2.vn_idx; - idx2.texcoord_index = i2.vt_idx; - - shape->mesh.indices.push_back(idx0); - shape->mesh.indices.push_back(idx1); - shape->mesh.indices.push_back(idx2); - - shape->mesh.num_face_vertices.push_back(3); - shape->mesh.material_ids.push_back(material_id); - shape->mesh.smoothing_group_ids.push_back( - face.smoothing_group_id); - } - } -#endif - } // npolys - } else { - for (size_t k = 0; k < npolys; k++) { - index_t idx; - idx.vertex_index = face.vertex_indices[k].v_idx; - idx.normal_index = face.vertex_indices[k].vn_idx; - idx.texcoord_index = face.vertex_indices[k].vt_idx; - shape->mesh.indices.push_back(idx); - } - - shape->mesh.num_face_vertices.push_back( - static_cast(npolys)); - shape->mesh.material_ids.push_back(material_id); // per face - shape->mesh.smoothing_group_ids.push_back( - face.smoothing_group_id); // per face - } - } - - shape->mesh.tags = tags; - } - - // line - if (!prim_group.lineGroup.empty()) { - // Flatten indices - for (size_t i = 0; i < prim_group.lineGroup.size(); i++) { - for (size_t j = 0; j < prim_group.lineGroup[i].vertex_indices.size(); - j++) { - const vertex_index_t &vi = prim_group.lineGroup[i].vertex_indices[j]; - - index_t idx; - idx.vertex_index = vi.v_idx; - idx.normal_index = vi.vn_idx; - idx.texcoord_index = vi.vt_idx; - - shape->lines.indices.push_back(idx); - } - - shape->lines.num_line_vertices.push_back( - int(prim_group.lineGroup[i].vertex_indices.size())); - } - } - - // points - if (!prim_group.pointsGroup.empty()) { - // Flatten & convert indices - for (size_t i = 0; i < prim_group.pointsGroup.size(); i++) { - for (size_t j = 0; j < prim_group.pointsGroup[i].vertex_indices.size(); - j++) { - const vertex_index_t &vi = prim_group.pointsGroup[i].vertex_indices[j]; - - index_t idx; - idx.vertex_index = vi.v_idx; - idx.normal_index = vi.vn_idx; - idx.texcoord_index = vi.vt_idx; - - shape->points.indices.push_back(idx); - } - } - } - - return true; -} - -// Split a string with specified delimiter character and escape character. -// https://rosettacode.org/wiki/Tokenize_a_string_with_escaping#C.2B.2B -static void SplitString(const std::string &s, char delim, char escape, - std::vector &elems) { - std::string token; - - bool escaping = false; - for (size_t i = 0; i < s.size(); ++i) { - char ch = s[i]; - if (escaping) { - escaping = false; - } else if (ch == escape) { - escaping = true; - continue; - } else if (ch == delim) { - if (!token.empty()) { - elems.push_back(token); - } - token.clear(); - continue; - } - token += ch; - } - - elems.push_back(token); -} - -static std::string JoinPath(const std::string &dir, - const std::string &filename) { - if (dir.empty()) { - return filename; - } else { - // check '/' - char lastChar = *dir.rbegin(); - if (lastChar != '/') { - return dir + std::string("/") + filename; - } else { - return dir + filename; - } - } -} - -void LoadMtl(std::map *material_map, - std::vector *materials, std::istream *inStream, - std::string *warning, std::string *err) { - (void)err; - - // Create a default material anyway. - material_t material; - InitMaterial(&material); - - // Issue 43. `d` wins against `Tr` since `Tr` is not in the MTL specification. - bool has_d = false; - bool has_tr = false; - - // has_kd is used to set a default diffuse value when map_Kd is present - // and Kd is not. - bool has_kd = false; - - std::stringstream warn_ss; - - size_t line_no = 0; - std::string linebuf; - while (inStream->peek() != -1) { - safeGetline(*inStream, linebuf); - line_no++; - - // Trim trailing whitespace. - if (linebuf.size() > 0) { - linebuf = linebuf.substr(0, linebuf.find_last_not_of(" \t") + 1); - } - - // Trim newline '\r\n' or '\n' - if (linebuf.size() > 0) { - if (linebuf[linebuf.size() - 1] == '\n') - linebuf.erase(linebuf.size() - 1); - } - if (linebuf.size() > 0) { - if (linebuf[linebuf.size() - 1] == '\r') - linebuf.erase(linebuf.size() - 1); - } - - // Skip if empty line. - if (linebuf.empty()) { - continue; - } - - // Skip leading space. - const char *token = linebuf.c_str(); - token += strspn(token, " \t"); - - assert(token); - if (token[0] == '\0') continue; // empty line - - if (token[0] == '#') continue; // comment line - - // new mtl - if ((0 == strncmp(token, "newmtl", 6)) && IS_SPACE((token[6]))) { - // flush previous material. - if (!material.name.empty()) { - material_map->insert(std::pair( - material.name, static_cast(materials->size()))); - materials->push_back(material); - } - - // initial temporary material - InitMaterial(&material); - - has_d = false; - has_tr = false; - - // set new mtl name - token += 7; - { - std::string namebuf = parseString(&token); - // TODO: empty name check? - if (namebuf.empty()) { - if (warning) { - (*warning) += "empty material name in `newmtl`\n"; - } - } - material.name = namebuf; - } - continue; - } - - // ambient - if (token[0] == 'K' && token[1] == 'a' && IS_SPACE((token[2]))) { - token += 2; - real_t r, g, b; - parseReal3(&r, &g, &b, &token); - material.ambient[0] = r; - material.ambient[1] = g; - material.ambient[2] = b; - continue; - } - - // diffuse - if (token[0] == 'K' && token[1] == 'd' && IS_SPACE((token[2]))) { - token += 2; - real_t r, g, b; - parseReal3(&r, &g, &b, &token); - material.diffuse[0] = r; - material.diffuse[1] = g; - material.diffuse[2] = b; - has_kd = true; - continue; - } - - // specular - if (token[0] == 'K' && token[1] == 's' && IS_SPACE((token[2]))) { - token += 2; - real_t r, g, b; - parseReal3(&r, &g, &b, &token); - material.specular[0] = r; - material.specular[1] = g; - material.specular[2] = b; - continue; - } - - // transmittance - if ((token[0] == 'K' && token[1] == 't' && IS_SPACE((token[2]))) || - (token[0] == 'T' && token[1] == 'f' && IS_SPACE((token[2])))) { - token += 2; - real_t r, g, b; - parseReal3(&r, &g, &b, &token); - material.transmittance[0] = r; - material.transmittance[1] = g; - material.transmittance[2] = b; - continue; - } - - // ior(index of refraction) - if (token[0] == 'N' && token[1] == 'i' && IS_SPACE((token[2]))) { - token += 2; - material.ior = parseReal(&token); - continue; - } - - // emission - if (token[0] == 'K' && token[1] == 'e' && IS_SPACE(token[2])) { - token += 2; - real_t r, g, b; - parseReal3(&r, &g, &b, &token); - material.emission[0] = r; - material.emission[1] = g; - material.emission[2] = b; - continue; - } - - // shininess - if (token[0] == 'N' && token[1] == 's' && IS_SPACE(token[2])) { - token += 2; - material.shininess = parseReal(&token); - continue; - } - - // illum model - if (0 == strncmp(token, "illum", 5) && IS_SPACE(token[5])) { - token += 6; - material.illum = parseInt(&token); - continue; - } - - // dissolve - if ((token[0] == 'd' && IS_SPACE(token[1]))) { - token += 1; - material.dissolve = parseReal(&token); - - if (has_tr) { - warn_ss << "Both `d` and `Tr` parameters defined for \"" - << material.name - << "\". Use the value of `d` for dissolve (line " << line_no - << " in .mtl.)\n"; - } - has_d = true; - continue; - } - if (token[0] == 'T' && token[1] == 'r' && IS_SPACE(token[2])) { - token += 2; - if (has_d) { - // `d` wins. Ignore `Tr` value. - warn_ss << "Both `d` and `Tr` parameters defined for \"" - << material.name - << "\". Use the value of `d` for dissolve (line " << line_no - << " in .mtl.)\n"; - } else { - // We invert value of Tr(assume Tr is in range [0, 1]) - // NOTE: Interpretation of Tr is application(exporter) dependent. For - // some application(e.g. 3ds max obj exporter), Tr = d(Issue 43) - material.dissolve = static_cast(1.0) - parseReal(&token); - } - has_tr = true; - continue; - } - - // PBR: roughness - if (token[0] == 'P' && token[1] == 'r' && IS_SPACE(token[2])) { - token += 2; - material.roughness = parseReal(&token); - continue; - } - - // PBR: metallic - if (token[0] == 'P' && token[1] == 'm' && IS_SPACE(token[2])) { - token += 2; - material.metallic = parseReal(&token); - continue; - } - - // PBR: sheen - if (token[0] == 'P' && token[1] == 's' && IS_SPACE(token[2])) { - token += 2; - material.sheen = parseReal(&token); - continue; - } - - // PBR: clearcoat thickness - if (token[0] == 'P' && token[1] == 'c' && IS_SPACE(token[2])) { - token += 2; - material.clearcoat_thickness = parseReal(&token); - continue; - } - - // PBR: clearcoat roughness - if ((0 == strncmp(token, "Pcr", 3)) && IS_SPACE(token[3])) { - token += 4; - material.clearcoat_roughness = parseReal(&token); - continue; - } - - // PBR: anisotropy - if ((0 == strncmp(token, "aniso", 5)) && IS_SPACE(token[5])) { - token += 6; - material.anisotropy = parseReal(&token); - continue; - } - - // PBR: anisotropy rotation - if ((0 == strncmp(token, "anisor", 6)) && IS_SPACE(token[6])) { - token += 7; - material.anisotropy_rotation = parseReal(&token); - continue; - } - - // ambient texture - if ((0 == strncmp(token, "map_Ka", 6)) && IS_SPACE(token[6])) { - token += 7; - ParseTextureNameAndOption(&(material.ambient_texname), - &(material.ambient_texopt), token); - continue; - } - - // diffuse texture - if ((0 == strncmp(token, "map_Kd", 6)) && IS_SPACE(token[6])) { - token += 7; - ParseTextureNameAndOption(&(material.diffuse_texname), - &(material.diffuse_texopt), token); - - // Set a decent diffuse default value if a diffuse texture is specified - // without a matching Kd value. - if (!has_kd) { - material.diffuse[0] = static_cast(0.6); - material.diffuse[1] = static_cast(0.6); - material.diffuse[2] = static_cast(0.6); - } - - continue; - } - - // specular texture - if ((0 == strncmp(token, "map_Ks", 6)) && IS_SPACE(token[6])) { - token += 7; - ParseTextureNameAndOption(&(material.specular_texname), - &(material.specular_texopt), token); - continue; - } - - // specular highlight texture - if ((0 == strncmp(token, "map_Ns", 6)) && IS_SPACE(token[6])) { - token += 7; - ParseTextureNameAndOption(&(material.specular_highlight_texname), - &(material.specular_highlight_texopt), token); - continue; - } - - // bump texture - if (((0 == strncmp(token, "map_bump", 8)) || - (0 == strncmp(token, "map_Bump", 8))) && - IS_SPACE(token[8])) { - token += 9; - ParseTextureNameAndOption(&(material.bump_texname), - &(material.bump_texopt), token); - continue; - } - - // bump texture - if ((0 == strncmp(token, "bump", 4)) && IS_SPACE(token[4])) { - token += 5; - ParseTextureNameAndOption(&(material.bump_texname), - &(material.bump_texopt), token); - continue; - } - - // alpha texture - if ((0 == strncmp(token, "map_d", 5)) && IS_SPACE(token[5])) { - token += 6; - material.alpha_texname = token; - ParseTextureNameAndOption(&(material.alpha_texname), - &(material.alpha_texopt), token); - continue; - } - - // displacement texture - if (((0 == strncmp(token, "map_disp", 8)) || - (0 == strncmp(token, "map_Disp", 8))) && - IS_SPACE(token[8])) { - token += 9; - ParseTextureNameAndOption(&(material.displacement_texname), - &(material.displacement_texopt), token); - continue; - } - - // displacement texture - if ((0 == strncmp(token, "disp", 4)) && IS_SPACE(token[4])) { - token += 5; - ParseTextureNameAndOption(&(material.displacement_texname), - &(material.displacement_texopt), token); - continue; - } - - // reflection map - if ((0 == strncmp(token, "refl", 4)) && IS_SPACE(token[4])) { - token += 5; - ParseTextureNameAndOption(&(material.reflection_texname), - &(material.reflection_texopt), token); - continue; - } - - // PBR: roughness texture - if ((0 == strncmp(token, "map_Pr", 6)) && IS_SPACE(token[6])) { - token += 7; - ParseTextureNameAndOption(&(material.roughness_texname), - &(material.roughness_texopt), token); - continue; - } - - // PBR: metallic texture - if ((0 == strncmp(token, "map_Pm", 6)) && IS_SPACE(token[6])) { - token += 7; - ParseTextureNameAndOption(&(material.metallic_texname), - &(material.metallic_texopt), token); - continue; - } - - // PBR: sheen texture - if ((0 == strncmp(token, "map_Ps", 6)) && IS_SPACE(token[6])) { - token += 7; - ParseTextureNameAndOption(&(material.sheen_texname), - &(material.sheen_texopt), token); - continue; - } - - // PBR: emissive texture - if ((0 == strncmp(token, "map_Ke", 6)) && IS_SPACE(token[6])) { - token += 7; - ParseTextureNameAndOption(&(material.emissive_texname), - &(material.emissive_texopt), token); - continue; - } - - // PBR: normal map texture - if ((0 == strncmp(token, "norm", 4)) && IS_SPACE(token[4])) { - token += 5; - ParseTextureNameAndOption(&(material.normal_texname), - &(material.normal_texopt), token); - continue; - } - - // unknown parameter - const char *_space = strchr(token, ' '); - if (!_space) { - _space = strchr(token, '\t'); - } - if (_space) { - std::ptrdiff_t len = _space - token; - std::string key(token, static_cast(len)); - std::string value = _space + 1; - material.unknown_parameter.insert( - std::pair(key, value)); - } - } - // flush last material. - material_map->insert(std::pair( - material.name, static_cast(materials->size()))); - materials->push_back(material); - - if (warning) { - (*warning) = warn_ss.str(); - } -} - -bool MaterialFileReader::operator()(const std::string &matId, - std::vector *materials, - std::map *matMap, - std::string *warn, std::string *err) { - if (!m_mtlBaseDir.empty()) { -#ifdef _WIN32 - char sep = ';'; -#else - char sep = ':'; -#endif - - // https://stackoverflow.com/questions/5167625/splitting-a-c-stdstring-using-tokens-e-g - std::vector paths; - std::istringstream f(m_mtlBaseDir); - - std::string s; - while (getline(f, s, sep)) { - paths.push_back(s); - } - - for (size_t i = 0; i < paths.size(); i++) { - std::string filepath = JoinPath(paths[i], matId); - - std::ifstream matIStream(filepath.c_str()); - if (matIStream) { - LoadMtl(matMap, materials, &matIStream, warn, err); - - return true; - } - } - - std::stringstream ss; - ss << "Material file [ " << matId - << " ] not found in a path : " << m_mtlBaseDir << "\n"; - if (warn) { - (*warn) += ss.str(); - } - return false; - - } else { - std::string filepath = matId; - std::ifstream matIStream(filepath.c_str()); - if (matIStream) { - LoadMtl(matMap, materials, &matIStream, warn, err); - - return true; - } - - std::stringstream ss; - ss << "Material file [ " << filepath - << " ] not found in a path : " << m_mtlBaseDir << "\n"; - if (warn) { - (*warn) += ss.str(); - } - - return false; - } -} - -bool MaterialStreamReader::operator()(const std::string &matId, - std::vector *materials, - std::map *matMap, - std::string *warn, std::string *err) { - (void)err; - (void)matId; - if (!m_inStream) { - std::stringstream ss; - ss << "Material stream in error state. \n"; - if (warn) { - (*warn) += ss.str(); - } - return false; - } - - LoadMtl(matMap, materials, &m_inStream, warn, err); - - return true; -} - -bool LoadObj(attrib_t *attrib, std::vector *shapes, - std::vector *materials, std::string *warn, - std::string *err, const char *filename, const char *mtl_basedir, - bool triangulate, bool default_vcols_fallback) { - attrib->vertices.clear(); - attrib->normals.clear(); - attrib->texcoords.clear(); - attrib->colors.clear(); - shapes->clear(); - - std::stringstream errss; - - std::ifstream ifs(filename); - if (!ifs) { - errss << "Cannot open file [" << filename << "]\n"; - if (err) { - (*err) = errss.str(); - } - return false; - } - - std::string baseDir = mtl_basedir ? mtl_basedir : ""; - if (!baseDir.empty()) { -#ifndef _WIN32 - const char dirsep = '/'; -#else - const char dirsep = '\\'; -#endif - if (baseDir[baseDir.length() - 1] != dirsep) baseDir += dirsep; - } - MaterialFileReader matFileReader(baseDir); - - return LoadObj(attrib, shapes, materials, warn, err, &ifs, &matFileReader, - triangulate, default_vcols_fallback); -} - -bool LoadObj(attrib_t *attrib, std::vector *shapes, - std::vector *materials, std::string *warn, - std::string *err, std::istream *inStream, - MaterialReader *readMatFn /*= NULL*/, bool triangulate, - bool default_vcols_fallback) { - std::stringstream errss; - - std::vector v; - std::vector vn; - std::vector vt; - std::vector vc; - std::vector vw; - std::vector tags; - PrimGroup prim_group; - std::string name; - - // material - std::set material_filenames; - std::map material_map; - int material = -1; - - // smoothing group id - unsigned int current_smoothing_id = - 0; // Initial value. 0 means no smoothing. - - int greatest_v_idx = -1; - int greatest_vn_idx = -1; - int greatest_vt_idx = -1; - - shape_t shape; - - bool found_all_colors = true; - - size_t line_num = 0; - std::string linebuf; - while (inStream->peek() != -1) { - safeGetline(*inStream, linebuf); - - line_num++; - - // Trim newline '\r\n' or '\n' - if (linebuf.size() > 0) { - if (linebuf[linebuf.size() - 1] == '\n') - linebuf.erase(linebuf.size() - 1); - } - if (linebuf.size() > 0) { - if (linebuf[linebuf.size() - 1] == '\r') - linebuf.erase(linebuf.size() - 1); - } - - // Skip if empty line. - if (linebuf.empty()) { - continue; - } - - // Skip leading space. - const char *token = linebuf.c_str(); - token += strspn(token, " \t"); - - assert(token); - if (token[0] == '\0') continue; // empty line - - if (token[0] == '#') continue; // comment line - - // vertex - if (token[0] == 'v' && IS_SPACE((token[1]))) { - token += 2; - real_t x, y, z; - real_t r, g, b; - - found_all_colors &= parseVertexWithColor(&x, &y, &z, &r, &g, &b, &token); - - v.push_back(x); - v.push_back(y); - v.push_back(z); - - if (found_all_colors || default_vcols_fallback) { - vc.push_back(r); - vc.push_back(g); - vc.push_back(b); - } - - continue; - } - - // normal - if (token[0] == 'v' && token[1] == 'n' && IS_SPACE((token[2]))) { - token += 3; - real_t x, y, z; - parseReal3(&x, &y, &z, &token); - vn.push_back(x); - vn.push_back(y); - vn.push_back(z); - continue; - } - - // texcoord - if (token[0] == 'v' && token[1] == 't' && IS_SPACE((token[2]))) { - token += 3; - real_t x, y; - parseReal2(&x, &y, &token); - vt.push_back(x); - vt.push_back(y); - continue; - } - - // skin weight. tinyobj extension - if (token[0] == 'v' && token[1] == 'w' && IS_SPACE((token[2]))) { - token += 3; - - // vw ... - // example: - // vw 0 0 0.25 1 0.25 2 0.5 - - // TODO(syoyo): Add syntax check - int vid = 0; - vid = parseInt(&token); - - skin_weight_t sw; - - sw.vertex_id = vid; - - while (!IS_NEW_LINE(token[0])) { - real_t j, w; - // joint_id should not be negative, weight may be negative - // TODO(syoyo): # of elements check - parseReal2(&j, &w, &token, -1.0); - - if (j < static_cast(0)) { - if (err) { - std::stringstream ss; - ss << "Failed parse `vw' line. joint_id is negative. " - "line " - << line_num << ".)\n"; - (*err) += ss.str(); - } - return false; - } - - joint_and_weight_t jw; - - jw.joint_id = int(j); - jw.weight = w; - - sw.weightValues.push_back(jw); - - size_t n = strspn(token, " \t\r"); - token += n; - } - - vw.push_back(sw); - } - - warning_context context; - context.warn = warn; - context.line_number = line_num; - - // line - if (token[0] == 'l' && IS_SPACE((token[1]))) { - token += 2; - - __line_t line; - - while (!IS_NEW_LINE(token[0])) { - vertex_index_t vi; - if (!parseTriple(&token, static_cast(v.size() / 3), - static_cast(vn.size() / 3), - static_cast(vt.size() / 2), &vi, context)) { - if (err) { - (*err) += "Failed to parse `l' line (e.g. a zero value for vertex index. Line " + - toString(line_num) + ").\n"; - } - return false; - } - - line.vertex_indices.push_back(vi); - - size_t n = strspn(token, " \t\r"); - token += n; - } - - prim_group.lineGroup.push_back(line); - - continue; - } - - // points - if (token[0] == 'p' && IS_SPACE((token[1]))) { - token += 2; - - __points_t pts; - - while (!IS_NEW_LINE(token[0])) { - vertex_index_t vi; - if (!parseTriple(&token, static_cast(v.size() / 3), - static_cast(vn.size() / 3), - static_cast(vt.size() / 2), &vi, context)) { - if (err) { - (*err) += "Failed to parse `p' line (e.g. a zero value for vertex index. Line " + - toString(line_num) + ").\n"; - } - return false; - } - - pts.vertex_indices.push_back(vi); - - size_t n = strspn(token, " \t\r"); - token += n; - } - - prim_group.pointsGroup.push_back(pts); - - continue; - } - - // face - if (token[0] == 'f' && IS_SPACE((token[1]))) { - token += 2; - token += strspn(token, " \t"); - - face_t face; - - face.smoothing_group_id = current_smoothing_id; - face.vertex_indices.reserve(3); - - while (!IS_NEW_LINE(token[0])) { - vertex_index_t vi; - if (!parseTriple(&token, static_cast(v.size() / 3), - static_cast(vn.size() / 3), - static_cast(vt.size() / 2), &vi, context)) { - if (err) { - (*err) += "Failed to parse `f' line (e.g. a zero value for vertex index. Line " + - toString(line_num) + ").\n"; - } - return false; - } - - greatest_v_idx = greatest_v_idx > vi.v_idx ? greatest_v_idx : vi.v_idx; - greatest_vn_idx = - greatest_vn_idx > vi.vn_idx ? greatest_vn_idx : vi.vn_idx; - greatest_vt_idx = - greatest_vt_idx > vi.vt_idx ? greatest_vt_idx : vi.vt_idx; - - face.vertex_indices.push_back(vi); - size_t n = strspn(token, " \t\r"); - token += n; - } - - // replace with emplace_back + std::move on C++11 - prim_group.faceGroup.push_back(face); - - continue; - } - - // use mtl - if ((0 == strncmp(token, "usemtl", 6))) { - token += 6; - std::string namebuf = parseString(&token); - - int newMaterialId = -1; - std::map::const_iterator it = - material_map.find(namebuf); - if (it != material_map.end()) { - newMaterialId = it->second; - } else { - // { error!! material not found } - if (warn) { - (*warn) += "material [ '" + namebuf + "' ] not found in .mtl\n"; - } - } - - if (newMaterialId != material) { - // Create per-face material. Thus we don't add `shape` to `shapes` at - // this time. - // just clear `faceGroup` after `exportGroupsToShape()` call. - exportGroupsToShape(&shape, prim_group, tags, material, name, - triangulate, v, warn); - prim_group.faceGroup.clear(); - material = newMaterialId; - } - - continue; - } - - // load mtl - if ((0 == strncmp(token, "mtllib", 6)) && IS_SPACE((token[6]))) { - if (readMatFn) { - token += 7; - - std::vector filenames; - SplitString(std::string(token), ' ', '\\', filenames); - - if (filenames.empty()) { - if (warn) { - std::stringstream ss; - ss << "Looks like empty filename for mtllib. Use default " - "material (line " - << line_num << ".)\n"; - - (*warn) += ss.str(); - } - } else { - bool found = false; - for (size_t s = 0; s < filenames.size(); s++) { - if (material_filenames.count(filenames[s]) > 0) { - found = true; - continue; - } - - std::string warn_mtl; - std::string err_mtl; - bool ok = (*readMatFn)(filenames[s].c_str(), materials, - &material_map, &warn_mtl, &err_mtl); - if (warn && (!warn_mtl.empty())) { - (*warn) += warn_mtl; - } - - if (err && (!err_mtl.empty())) { - (*err) += err_mtl; - } - - if (ok) { - found = true; - material_filenames.insert(filenames[s]); - break; - } - } - - if (!found) { - if (warn) { - (*warn) += - "Failed to load material file(s). Use default " - "material.\n"; - } - } - } - } - - continue; - } - - // group name - if (token[0] == 'g' && IS_SPACE((token[1]))) { - // flush previous face group. - bool ret = exportGroupsToShape(&shape, prim_group, tags, material, name, - triangulate, v, warn); - (void)ret; // return value not used. - - if (shape.mesh.indices.size() > 0) { - shapes->push_back(shape); - } - - shape = shape_t(); - - // material = -1; - prim_group.clear(); - - std::vector names; - - while (!IS_NEW_LINE(token[0])) { - std::string str = parseString(&token); - names.push_back(str); - token += strspn(token, " \t\r"); // skip tag - } - - // names[0] must be 'g' - - if (names.size() < 2) { - // 'g' with empty names - if (warn) { - std::stringstream ss; - ss << "Empty group name. line: " << line_num << "\n"; - (*warn) += ss.str(); - name = ""; - } - } else { - std::stringstream ss; - ss << names[1]; - - // tinyobjloader does not support multiple groups for a primitive. - // Currently we concatinate multiple group names with a space to get - // single group name. - - for (size_t i = 2; i < names.size(); i++) { - ss << " " << names[i]; - } - - name = ss.str(); - } - - continue; - } - - // object name - if (token[0] == 'o' && IS_SPACE((token[1]))) { - // flush previous face group. - bool ret = exportGroupsToShape(&shape, prim_group, tags, material, name, - triangulate, v, warn); - (void)ret; // return value not used. - - if (shape.mesh.indices.size() > 0 || shape.lines.indices.size() > 0 || - shape.points.indices.size() > 0) { - shapes->push_back(shape); - } - - // material = -1; - prim_group.clear(); - shape = shape_t(); - - // @todo { multiple object name? } - token += 2; - std::stringstream ss; - ss << token; - name = ss.str(); - - continue; - } - - if (token[0] == 't' && IS_SPACE(token[1])) { - const int max_tag_nums = 8192; // FIXME(syoyo): Parameterize. - tag_t tag; - - token += 2; - - tag.name = parseString(&token); - - tag_sizes ts = parseTagTriple(&token); - - if (ts.num_ints < 0) { - ts.num_ints = 0; - } - if (ts.num_ints > max_tag_nums) { - ts.num_ints = max_tag_nums; - } - - if (ts.num_reals < 0) { - ts.num_reals = 0; - } - if (ts.num_reals > max_tag_nums) { - ts.num_reals = max_tag_nums; - } - - if (ts.num_strings < 0) { - ts.num_strings = 0; - } - if (ts.num_strings > max_tag_nums) { - ts.num_strings = max_tag_nums; - } - - tag.intValues.resize(static_cast(ts.num_ints)); - - for (size_t i = 0; i < static_cast(ts.num_ints); ++i) { - tag.intValues[i] = parseInt(&token); - } - - tag.floatValues.resize(static_cast(ts.num_reals)); - for (size_t i = 0; i < static_cast(ts.num_reals); ++i) { - tag.floatValues[i] = parseReal(&token); - } - - tag.stringValues.resize(static_cast(ts.num_strings)); - for (size_t i = 0; i < static_cast(ts.num_strings); ++i) { - tag.stringValues[i] = parseString(&token); - } - - tags.push_back(tag); - - continue; - } - - if (token[0] == 's' && IS_SPACE(token[1])) { - // smoothing group id - token += 2; - - // skip space. - token += strspn(token, " \t"); // skip space - - if (token[0] == '\0') { - continue; - } - - if (token[0] == '\r' || token[1] == '\n') { - continue; - } - - if (strlen(token) >= 3 && token[0] == 'o' && token[1] == 'f' && - token[2] == 'f') { - current_smoothing_id = 0; - } else { - // assume number - int smGroupId = parseInt(&token); - if (smGroupId < 0) { - // parse error. force set to 0. - // FIXME(syoyo): Report warning. - current_smoothing_id = 0; - } else { - current_smoothing_id = static_cast(smGroupId); - } - } - - continue; - } // smoothing group id - - // Ignore unknown command. - } - - // not all vertices have colors, no default colors desired? -> clear colors - if (!found_all_colors && !default_vcols_fallback) { - vc.clear(); - } - - if (greatest_v_idx >= static_cast(v.size() / 3)) { - if (warn) { - std::stringstream ss; - ss << "Vertex indices out of bounds (line " << line_num << ".)\n\n"; - (*warn) += ss.str(); - } - } - if (greatest_vn_idx >= static_cast(vn.size() / 3)) { - if (warn) { - std::stringstream ss; - ss << "Vertex normal indices out of bounds (line " << line_num << ".)\n\n"; - (*warn) += ss.str(); - } - } - if (greatest_vt_idx >= static_cast(vt.size() / 2)) { - if (warn) { - std::stringstream ss; - ss << "Vertex texcoord indices out of bounds (line " << line_num << ".)\n\n"; - (*warn) += ss.str(); - } - } - - bool ret = exportGroupsToShape(&shape, prim_group, tags, material, name, - triangulate, v, warn); - // exportGroupsToShape return false when `usemtl` is called in the last - // line. - // we also add `shape` to `shapes` when `shape.mesh` has already some - // faces(indices) - if (ret || shape.mesh.indices - .size()) { // FIXME(syoyo): Support other prims(e.g. lines) - shapes->push_back(shape); - } - prim_group.clear(); // for safety - - if (err) { - (*err) += errss.str(); - } - - attrib->vertices.swap(v); - attrib->vertex_weights.swap(v); - attrib->normals.swap(vn); - attrib->texcoords.swap(vt); - attrib->texcoord_ws.swap(vt); - attrib->colors.swap(vc); - attrib->skin_weights.swap(vw); - - return true; -} - -bool LoadObjWithCallback(std::istream &inStream, const callback_t &callback, - void *user_data /*= NULL*/, - MaterialReader *readMatFn /*= NULL*/, - std::string *warn, /* = NULL*/ - std::string *err /*= NULL*/) { - std::stringstream errss; - - // material - std::set material_filenames; - std::map material_map; - int material_id = -1; // -1 = invalid - - std::vector indices; - std::vector materials; - std::vector names; - names.reserve(2); - std::vector names_out; - - std::string linebuf; - while (inStream.peek() != -1) { - safeGetline(inStream, linebuf); - - // Trim newline '\r\n' or '\n' - if (linebuf.size() > 0) { - if (linebuf[linebuf.size() - 1] == '\n') - linebuf.erase(linebuf.size() - 1); - } - if (linebuf.size() > 0) { - if (linebuf[linebuf.size() - 1] == '\r') - linebuf.erase(linebuf.size() - 1); - } - - // Skip if empty line. - if (linebuf.empty()) { - continue; - } - - // Skip leading space. - const char *token = linebuf.c_str(); - token += strspn(token, " \t"); - - assert(token); - if (token[0] == '\0') continue; // empty line - - if (token[0] == '#') continue; // comment line - - // vertex - if (token[0] == 'v' && IS_SPACE((token[1]))) { - token += 2; - real_t x, y, z; - real_t r, g, b; - - bool found_color = parseVertexWithColor(&x, &y, &z, &r, &g, &b, &token); - if (callback.vertex_cb) { - callback.vertex_cb(user_data, x, y, z, r); // r=w is optional - } - if (callback.vertex_color_cb) { - callback.vertex_color_cb(user_data, x, y, z, r, g, b, found_color); - } - continue; - } - - // normal - if (token[0] == 'v' && token[1] == 'n' && IS_SPACE((token[2]))) { - token += 3; - real_t x, y, z; - parseReal3(&x, &y, &z, &token); - if (callback.normal_cb) { - callback.normal_cb(user_data, x, y, z); - } - continue; - } - - // texcoord - if (token[0] == 'v' && token[1] == 't' && IS_SPACE((token[2]))) { - token += 3; - real_t x, y, z; // y and z are optional. default = 0.0 - parseReal3(&x, &y, &z, &token); - if (callback.texcoord_cb) { - callback.texcoord_cb(user_data, x, y, z); - } - continue; - } - - // face - if (token[0] == 'f' && IS_SPACE((token[1]))) { - token += 2; - token += strspn(token, " \t"); - - indices.clear(); - while (!IS_NEW_LINE(token[0])) { - vertex_index_t vi = parseRawTriple(&token); - - index_t idx; - idx.vertex_index = vi.v_idx; - idx.normal_index = vi.vn_idx; - idx.texcoord_index = vi.vt_idx; - - indices.push_back(idx); - size_t n = strspn(token, " \t\r"); - token += n; - } - - if (callback.index_cb && indices.size() > 0) { - callback.index_cb(user_data, &indices.at(0), - static_cast(indices.size())); - } - - continue; - } - - // use mtl - if ((0 == strncmp(token, "usemtl", 6)) && IS_SPACE((token[6]))) { - token += 7; - std::stringstream ss; - ss << token; - std::string namebuf = ss.str(); - - int newMaterialId = -1; - std::map::const_iterator it = - material_map.find(namebuf); - if (it != material_map.end()) { - newMaterialId = it->second; - } else { - // { warn!! material not found } - if (warn && (!callback.usemtl_cb)) { - (*warn) += "material [ " + namebuf + " ] not found in .mtl\n"; - } - } - - if (newMaterialId != material_id) { - material_id = newMaterialId; - } - - if (callback.usemtl_cb) { - callback.usemtl_cb(user_data, namebuf.c_str(), material_id); - } - - continue; - } - - // load mtl - if ((0 == strncmp(token, "mtllib", 6)) && IS_SPACE((token[6]))) { - if (readMatFn) { - token += 7; - - std::vector filenames; - SplitString(std::string(token), ' ', '\\', filenames); - - if (filenames.empty()) { - if (warn) { - (*warn) += - "Looks like empty filename for mtllib. Use default " - "material. \n"; - } - } else { - bool found = false; - for (size_t s = 0; s < filenames.size(); s++) { - if (material_filenames.count(filenames[s]) > 0) { - found = true; - continue; - } - - std::string warn_mtl; - std::string err_mtl; - bool ok = (*readMatFn)(filenames[s].c_str(), &materials, - &material_map, &warn_mtl, &err_mtl); - - if (warn && (!warn_mtl.empty())) { - (*warn) += warn_mtl; // This should be warn message. - } - - if (err && (!err_mtl.empty())) { - (*err) += err_mtl; - } - - if (ok) { - found = true; - material_filenames.insert(filenames[s]); - break; - } - } - - if (!found) { - if (warn) { - (*warn) += - "Failed to load material file(s). Use default " - "material.\n"; - } - } else { - if (callback.mtllib_cb) { - callback.mtllib_cb(user_data, &materials.at(0), - static_cast(materials.size())); - } - } - } - } - - continue; - } - - // group name - if (token[0] == 'g' && IS_SPACE((token[1]))) { - names.clear(); - - while (!IS_NEW_LINE(token[0])) { - std::string str = parseString(&token); - names.push_back(str); - token += strspn(token, " \t\r"); // skip tag - } - - assert(names.size() > 0); - - if (callback.group_cb) { - if (names.size() > 1) { - // create const char* array. - names_out.resize(names.size() - 1); - for (size_t j = 0; j < names_out.size(); j++) { - names_out[j] = names[j + 1].c_str(); - } - callback.group_cb(user_data, &names_out.at(0), - static_cast(names_out.size())); - - } else { - callback.group_cb(user_data, NULL, 0); - } - } - - continue; - } - - // object name - if (token[0] == 'o' && IS_SPACE((token[1]))) { - // @todo { multiple object name? } - token += 2; - - std::stringstream ss; - ss << token; - std::string object_name = ss.str(); - - if (callback.object_cb) { - callback.object_cb(user_data, object_name.c_str()); - } - - continue; - } - -#if 0 // @todo - if (token[0] == 't' && IS_SPACE(token[1])) { - tag_t tag; - - token += 2; - std::stringstream ss; - ss << token; - tag.name = ss.str(); - - token += tag.name.size() + 1; - - tag_sizes ts = parseTagTriple(&token); - - tag.intValues.resize(static_cast(ts.num_ints)); - - for (size_t i = 0; i < static_cast(ts.num_ints); ++i) { - tag.intValues[i] = atoi(token); - token += strcspn(token, "/ \t\r") + 1; - } - - tag.floatValues.resize(static_cast(ts.num_reals)); - for (size_t i = 0; i < static_cast(ts.num_reals); ++i) { - tag.floatValues[i] = parseReal(&token); - token += strcspn(token, "/ \t\r") + 1; - } - - tag.stringValues.resize(static_cast(ts.num_strings)); - for (size_t i = 0; i < static_cast(ts.num_strings); ++i) { - std::stringstream ss; - ss << token; - tag.stringValues[i] = ss.str(); - token += tag.stringValues[i].size() + 1; - } - - tags.push_back(tag); - } -#endif - - // Ignore unknown command. - } - - if (err) { - (*err) += errss.str(); - } - - return true; -} - -bool ObjReader::ParseFromFile(const std::string &filename, - const ObjReaderConfig &config) { - std::string mtl_search_path; - - if (config.mtl_search_path.empty()) { - // - // split at last '/'(for unixish system) or '\\'(for windows) to get - // the base directory of .obj file - // - size_t pos = filename.find_last_of("/\\"); - if (pos != std::string::npos) { - mtl_search_path = filename.substr(0, pos); - } - } else { - mtl_search_path = config.mtl_search_path; - } - - valid_ = LoadObj(&attrib_, &shapes_, &materials_, &warning_, &error_, - filename.c_str(), mtl_search_path.c_str(), - config.triangulate, config.vertex_color); - - return valid_; -} - -bool ObjReader::ParseFromString(const std::string &obj_text, - const std::string &mtl_text, - const ObjReaderConfig &config) { - std::stringbuf obj_buf(obj_text); - std::stringbuf mtl_buf(mtl_text); - - std::istream obj_ifs(&obj_buf); - std::istream mtl_ifs(&mtl_buf); - - MaterialStreamReader mtl_ss(mtl_ifs); - - valid_ = LoadObj(&attrib_, &shapes_, &materials_, &warning_, &error_, - &obj_ifs, &mtl_ss, config.triangulate, config.vertex_color); - - return valid_; -} - -#ifdef __clang__ -#pragma clang diagnostic pop -#endif -} // namespace tinyobj - -#endif