summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authordzaima <dzaimagit@gmail.com>2024-01-09 14:06:29 +0200
committerGitHub <noreply@github.com>2024-01-09 14:06:29 +0200
commit8c18fddd5483c5fba658f24674b004e4444bf72d (patch)
tree461a7a55ca5f90980fe62424a7a32a30c98580e5
parent20185b57c5b2b02356306444b6510b5e6e8744fd (diff)
parent7511420077500bb5fb6a013b630fb8ecaf92a165 (diff)
Merge pull request #103 from mlochbaum/classify-bytev0.6.0
Faster long 1-byte Classify
-rw-r--r--src/builtins/selfsearch.c16
-rw-r--r--src/singeli/src/search.singeli2
2 files changed, 17 insertions, 1 deletions
diff --git a/src/builtins/selfsearch.c b/src/builtins/selfsearch.c
index 0647bc2e..5da2e233 100644
--- a/src/builtins/selfsearch.c
+++ b/src/builtins/selfsearch.c
@@ -12,6 +12,8 @@
// Branchless, not vectorized (+´∧` structure for ⊐)
// COULD use direct all-pairs filter, not ∊⊸/, for short ⍷
// Full-size table lookups for 1- and 2-byte 𝕩
+// 1-byte ∊ and ⍷, and ⊐ via ⍷⊸⊐: SSSE3/AVX2 table
+// TRIED dedicated table constructor for ⊐, no significant speedup
// 2-byte table can be "sparse" initialized with an extra pass over 𝕩
// 4-byte ⊐ can use a small-range lookup table
// COULD add small-range 4-byte tables for ∊ and ⊒
@@ -39,6 +41,7 @@ extern B slash_c2(B, B, B);
extern B ud_c1(B, B);
extern B sub_c2(B, B, B);
extern B mul_c2(B, B, B);
+extern B indexOf_c2(B, B, B);
extern B scan_add_bool(B x, u64 ia);
extern B scan_max_num(B x, u8 xe, u64 ia);
@@ -353,6 +356,7 @@ B count_c1(B t, B x) {
static B reduceI32WidthBelow(B r, usz after) {
return after<=2? taga(cpyBitArr(r)) : after<=I8_MAX+1? taga(cpyI8Arr(r)) : after<=I16_MAX+1? taga(cpyI16Arr(r)) : r;
}
+B find_c1(B, B);
B indexOf_c1(B t, B x) {
if (isAtm(x) || RNK(x)==0) thrM("⊐: 𝕩 cannot have rank 0");
@@ -398,7 +402,17 @@ B indexOf_c1(B t, B x) {
DOTAB(u##T) \
decG(x); TFREE(tab); \
return reduceI32WidthBelow(r, u)
- if (lw==3) { if (n<12) { BRUTE(8); } else { LOOKUP(8); } }
+ if (lw==3) {
+ if (n<12) { BRUTE(8); }
+ // If bit-table Mark Firsts is available
+ #if SINGELI && defined(__SSSE3__)
+ else if (n > 1<<12) {
+ B u = C1(find, incG(x));
+ return C2(indexOf, u, x);
+ }
+ #endif
+ else { LOOKUP(8); }
+ }
if (lw==4) { if (n<12) { BRUTE(16); } else { LOOKUP(16); } }
#undef LOOKUP
diff --git a/src/singeli/src/search.singeli b/src/singeli/src/search.singeli
index 7eceae3c..0e4d0287 100644
--- a/src/singeli/src/search.singeli
+++ b/src/singeli/src/search.singeli
@@ -229,6 +229,8 @@ def do_bittab{x0:*void, n:u64, tab:*void, u:u8, t, mode, r0} = {
}
setlabel{done}
}
+ # When u==0 isn't immediately tested, first result can be overwritten
+ if (rval) store{*u8~~r0, 0, load{*u8~~x0}}
u
}