From 2af2fb8e0cc7073b96be074f7585380138757332 Mon Sep 17 00:00:00 2001 From: Marshall Lochbaum Date: Sun, 7 Jan 2024 19:54:13 -0500 Subject: Use ⍷⊸⊐ for Classify (⊐) on long 1-byte arguments if SIMD bit-table is available MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/builtins/selfsearch.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/src/builtins/selfsearch.c b/src/builtins/selfsearch.c index 0647bc2e..5da2e233 100644 --- a/src/builtins/selfsearch.c +++ b/src/builtins/selfsearch.c @@ -12,6 +12,8 @@ // Branchless, not vectorized (+´∧` structure for ⊐) // COULD use direct all-pairs filter, not ∊⊸/, for short ⍷ // Full-size table lookups for 1- and 2-byte 𝕩 +// 1-byte ∊ and ⍷, and ⊐ via ⍷⊸⊐: SSSE3/AVX2 table +// TRIED dedicated table constructor for ⊐, no significant speedup // 2-byte table can be "sparse" initialized with an extra pass over 𝕩 // 4-byte ⊐ can use a small-range lookup table // COULD add small-range 4-byte tables for ∊ and ⊒ @@ -39,6 +41,7 @@ extern B slash_c2(B, B, B); extern B ud_c1(B, B); extern B sub_c2(B, B, B); extern B mul_c2(B, B, B); +extern B indexOf_c2(B, B, B); extern B scan_add_bool(B x, u64 ia); extern B scan_max_num(B x, u8 xe, u64 ia); @@ -353,6 +356,7 @@ B count_c1(B t, B x) { static B reduceI32WidthBelow(B r, usz after) { return after<=2? taga(cpyBitArr(r)) : after<=I8_MAX+1? taga(cpyI8Arr(r)) : after<=I16_MAX+1? taga(cpyI16Arr(r)) : r; } +B find_c1(B, B); B indexOf_c1(B t, B x) { if (isAtm(x) || RNK(x)==0) thrM("⊐: 𝕩 cannot have rank 0"); @@ -398,7 +402,17 @@ B indexOf_c1(B t, B x) { DOTAB(u##T) \ decG(x); TFREE(tab); \ return reduceI32WidthBelow(r, u) - if (lw==3) { if (n<12) { BRUTE(8); } else { LOOKUP(8); } } + if (lw==3) { + if (n<12) { BRUTE(8); } + // If bit-table Mark Firsts is available + #if SINGELI && defined(__SSSE3__) + else if (n > 1<<12) { + B u = C1(find, incG(x)); + return C2(indexOf, u, x); + } + #endif + else { LOOKUP(8); } + } if (lw==4) { if (n<12) { BRUTE(16); } else { LOOKUP(16); } } #undef LOOKUP -- cgit v1.2.3-70-g09d2 From 7511420077500bb5fb6a013b630fb8ecaf92a165 Mon Sep 17 00:00:00 2001 From: Marshall Lochbaum Date: Sun, 7 Jan 2024 21:27:12 -0500 Subject: Fix first element of 1-byte Deduplicate result in some cases with 256 uniques --- src/singeli/src/search.singeli | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/singeli/src/search.singeli b/src/singeli/src/search.singeli index 7eceae3c..0e4d0287 100644 --- a/src/singeli/src/search.singeli +++ b/src/singeli/src/search.singeli @@ -229,6 +229,8 @@ def do_bittab{x0:*void, n:u64, tab:*void, u:u8, t, mode, r0} = { } setlabel{done} } + # When u==0 isn't immediately tested, first result can be overwritten + if (rval) store{*u8~~r0, 0, load{*u8~~x0}} u } -- cgit v1.2.3-70-g09d2