7 files changed, 731 insertions, 927 deletions
diff --git a/crates/store/src/fts/bloom.rs b/crates/store/src/fts/bloom.rs
deleted file mode 100644
index 6145a637..00000000
--- a/crates/store/src/fts/bloom.rs
+++ /dev/null
@@ -1,257 +0,0 @@
-/*
- * Copyright (c) 2023 Stalwart Labs Ltd.
- *
- * This file is part of the Stalwart Mail Server.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as
- * published by the Free Software Foundation, either version 3 of
- * the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Affero General Public License for more details.
- * in the LICENSE file at the top-level directory of this distribution.
- * You should have received a copy of the GNU Affero General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- * You can be released from the requirements of the AGPLv3 license by
- * purchasing a commercial license. Please contact licensing@stalw.art
- * for more details.
-*/
-
-use std::{
-    borrow::Cow,
-    f64::consts::LN_2,
-    hash::{Hash, Hasher},
-};
-
-use nlp::{language::stemmer::StemmedToken, tokenizers::Token};
-use roaring::RoaringBitmap;
-use utils::codec::leb128::{Leb128Reader, Leb128Vec};
-
-use crate::{Deserialize, Error, Serialize};
-
-pub struct BloomFilter {
-    m: u64,
-    b: RoaringBitmap,
-}
-
-#[derive(Debug)]
-pub struct BloomHash {
-    pub h: [u64; 7],
-}
-
-#[derive(Debug)]
-pub struct BloomHashGroup {
-    pub h1: BloomHash,
-    pub h2: Option<BloomHash>,
-}
-
-const AHASHER: ahash::RandomState = ahash::RandomState::with_seeds(
-    0xaf1f2242106c64b3,
-    0x60ca4cfb4b3ed0ce,
-    0xc7dbc0bb615e82b3,
-    0x520ad065378daf88,
-);
-lazy_static::lazy_static! {
-    static ref SIPHASHER: siphasher::sip::SipHasher13 =
-        siphasher::sip::SipHasher13::new_with_keys(0x56205cbdba8f02a6, 0xbd0dbc4bb06d687b);
-}
-
-const P: f64 = 0.01;
-
-impl BloomFilter {
-    pub fn new(items: usize) -> Self {
-        Self {
-            m: if items > 0 {
-                std::cmp::max(Self::estimate_m(items, P), 10240)
-            } else {
-                0
-            },
-            b: RoaringBitmap::new(),
-        }
-    }
-
-    fn from_params(m: u64, b: RoaringBitmap) -> Self {
-        Self { m, b }
-    }
-
-    fn estimate_m(n: usize, p: f64) -> u64 {
-        (((n as f64) * f64::ln(p) / (-8.0 * LN_2.powi(2))).ceil() as u64) * 8
-    }
-
-    #[allow(dead_code)]
-    fn estimate_k(m: u64, n: usize) -> u32 {
-        std::cmp::max(((m as f64) / (n as f64) * f64::ln(2.0f64)).ceil() as u32, 1)
-    }
-
-    pub fn insert(&mut self, hash: &BloomHash) {
-        self.b.insert((hash.h[0] % self.m) as u32);
-        self.b.insert((hash.h[1] % self.m) as u32);
-        self.b.insert((hash.h[2] % self.m) as u32);
-        self.b.insert((hash.h[3] % self.m) as u32);
-        self.b.insert((hash.h[4] % self.m) as u32);
-        self.b.insert((hash.h[5] % self.m) as u32);
-        self.b.insert((hash.h[6] % self.m) as u32);
-    }
-
-    pub fn contains(&self, hash: &BloomHash) -> bool {
-        self.b.contains((hash.h[0] % self.m) as u32)
-            && self.b.contains((hash.h[1] % self.m) as u32)
-            && self.b.contains((hash.h[2] % self.m) as u32)
-            && self.b.contains((hash.h[3] % self.m) as u32)
-            && self.b.contains((hash.h[4] % self.m) as u32)
-            && self.b.contains((hash.h[5] % self.m) as u32)
-            && self.b.contains((hash.h[6] % self.m) as u32)
-    }
-
-    pub fn is_subset(&self, other: &Self) -> bool {
-        self.b.is_subset(&other.b)
-    }
-
-    pub fn is_empty(&self) -> bool {
-        self.m == 0 || self.b.is_empty()
-    }
-}
-
-pub trait BloomHasher {
-    fn hash<T: Hash + AsRef<[u8]> + ?Sized>(item: &T) -> Self;
-}
-
-impl BloomHash {
-    pub fn hash<T: Hash + AsRef<[u8]> + ?Sized>(item: &T) -> Self {
-        let h1 = xxhash_rust::xxh3::xxh3_64(item.as_ref());
-        let h2 = farmhash::hash64(item.as_ref());
-        let h3 = AHASHER.hash_one(item);
-        let mut sh = *SIPHASHER;
-        sh.write(item.as_ref());
-        let h4 = sh.finish();
-
-        Self {
-            h: [h1, h2, h3, h4, h1 ^ h2, h2 ^ h3, h3 ^ h4],
-        }
-    }
-}
-
-pub fn hash_token(item: &str) -> Vec<u8> {
-    let h1 = xxhash_rust::xxh3::xxh3_64(item.as_ref()).to_le_bytes();
-    let h2 = farmhash::hash64(item.as_ref()).to_le_bytes();
-    let h3 = AHASHER.hash_one(item).to_le_bytes();
-    let mut sh = *SIPHASHER;
-    sh.write(item.as_ref());
-    let h4 = sh.finish().to_le_bytes();
-
-    match item.len() {
-        0..=8 => {
-            let mut hash = Vec::with_capacity(6);
-            hash.extend_from_slice(&h1[..2]);
-            hash.extend_from_slice(&h2[..2]);
-            hash.push(h3[0]);
-            hash.push(h4[0]);
-            hash
-        }
-        9..=16 => {
-            let mut hash = Vec::with_capacity(8);
-            hash.extend_from_slice(&h1[..2]);
-            hash.extend_from_slice(&h2[..2]);
-            hash.extend_from_slice(&h3[..2]);
-            hash.extend_from_slice(&h4[..2]);
-            hash
-        }
-        17..=32 => {
-            let mut hash = Vec::with_capacity(12);
-            hash.extend_from_slice(&h1[..3]);
-            hash.extend_from_slice(&h2[..3]);
-            hash.extend_from_slice(&h3[..3]);
-            hash.extend_from_slice(&h4[..3]);
-            hash
-        }
-        _ => {
-            let mut hash = Vec::with_capacity(16);
-            hash.extend_from_slice(&h1[..4]);
-            hash.extend_from_slice(&h2[..4]);
-            hash.extend_from_slice(&h3[..4]);
-            hash.extend_from_slice(&h4[..4]);
-            hash
-        }
-    }
-}
-
-impl From<&str> for BloomHash {
-    fn from(s: &str) -> Self {
-        Self::hash(&s)
-    }
-}
-
-impl From<String> for BloomHash {
-    fn from(s: String) -> Self {
-        Self::hash(&s)
-    }
-}
-
-impl From<&String> for BloomHash {
-    fn from(s: &String) -> Self {
-        Self::hash(&s)
-    }
-}
-
-impl From<Cow<'_, str>> for BloomHash {
-    fn from(s: Cow<'_, str>) -> Self {
-        Self::hash(s.as_ref())
-    }
-}
-
-impl From<Token<Cow<'_, str>>> for BloomHashGroup {
-    fn from(t: Token<Cow<'_, str>>) -> Self {
-        Self {
-            h1: BloomHash::hash(t.word.as_ref()),
-            h2: None,
-        }
-    }
-}
-
-impl From<StemmedToken<'_>> for BloomHashGroup {
-    fn from(t: StemmedToken<'_>) -> Self {
-        Self {
-            h1: BloomHash::hash(t.word.as_ref()),
-            h2: t.stemmed_word.map(|w| BloomHash::hash(&format!("{w}_"))),
-        }
-    }
-}
-
-impl From<Cow<'_, str>> for BloomHashGroup {
-    fn from(t: Cow<'_, str>) -> Self {
-        Self {
-            h1: BloomHash::hash(t.as_ref()),
-            h2: None,
-        }
-    }
-}
-
-impl Serialize for BloomFilter {
-    fn serialize(self) -> Vec<u8> {
-        let mut buf = Vec::with_capacity(U64_LEN + self.b.serialized_size());
-        buf.push_leb128(self.m);
-        let _ = self.b.serialize_into(&mut buf);
-        buf
-    }
-}
-
-impl Deserialize for BloomFilter {
-    fn deserialize(bytes: &[u8]) -> crate::Result<Self> {
-        let (m, pos) = bytes.read_leb128().ok_or_else(|| {
-            Error::InternalError(
-                "Failed to read 'm' value while deserializing bloom filter.".to_string(),
-            )
-        })?;
-        RoaringBitmap::deserialize_unchecked_from(bytes.get(pos..).ok_or_else(|| {
-            Error::InternalError(
-                "Failed to read bitmap while deserializing bloom filter.".to_string(),
-            )
-        })?)
-        .map_err(|err| Error::InternalError(format!("Failed to deserialize bloom filter: {err}.")))
-        .map(|b| Self::from_params(m, b))
-    }
-}
diff --git a/crates/store/src/fts/builder.rs b/crates/store/src/fts/builder.rs
deleted file mode 100644
index f4a8422d..00000000
--- a/crates/store/src/fts/builder.rs
+++ /dev/null
@@ -1,250 +0,0 @@
-/*
- * Copyright (c) 2023 Stalwart Labs Ltd.
- *
- * This file is part of the Stalwart Mail Server.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as
- * published by the Free Software Foundation, either version 3 of
- * the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Affero General Public License for more details.
- * in the LICENSE file at the top-level directory of this distribution.
- * You should have received a copy of the GNU Affero General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- * You can be released from the requirements of the AGPLv3 license by
- * purchasing a commercial license. Please contact licensing@stalw.art
- * for more details.
-*/
-
-use std::{borrow::Cow, collections::HashSet, fmt::Display};
-
-use ahash::AHashSet;
-use nlp::{
-    language::{
-        detect::{LanguageDetector, MIN_LANGUAGE_SCORE},
-        stemmer::Stemmer,
-        Language,
-    },
-    tokenizers::{space::SpaceTokenizer, Token},
-};
-use utils::map::vec_map::VecMap;
-
-use crate::{
-    query::RawValue,
-    write::{BatchBuilder, IntoOperations, Operation, ValueClass},
-    Serialize, HASH_EXACT, HASH_STEMMED,
-};
-
-use super::term_index::{TermIndexBuilder, TokenIndex};
-
-pub const MAX_TOKEN_LENGTH: usize = (u8::MAX >> 2) as usize;
-pub const MAX_TOKEN_MASK: usize = MAX_TOKEN_LENGTH - 1;
-
-struct Text<'x, T: Into<u8> + Display> {
-    field: T,
-    text: Cow<'x, str>,
-    language: Type,
-}
-
-enum Type {
-    Stem(Language),
-    Tokenize,
-    Static,
-}
-
-pub struct FtsIndexBuilder<'x, T: Into<u8> + Display> {
-    parts: Vec<Text<'x, T>>,
-    default_language: Language,
-}
-
-impl<'x, T: Into<u8> + Display> FtsIndexBuilder<'x, T> {
-    pub fn with_default_language(default_language: Language) -> FtsIndexBuilder<'x, T> {
-        FtsIndexBuilder {
-            parts: vec![],
-            default_language,
-        }
-    }
-
-    pub fn index(&mut self, field: T, text: impl Into<Cow<'x, str>>, language: Language) {
-        self.parts.push(Text {
-            field,
-            text: text.into(),
-            language: Type::Stem(language),
-        });
-    }
-
-    pub fn index_raw(&mut self, field: T, text: impl Into<Cow<'x, str>>) {
-        self.parts.push(Text {
-            field,
-            text: text.into(),
-            language: Type::Tokenize,
-        });
-    }
-
-    pub fn index_raw_token(&mut self, field: T, text: impl Into<Cow<'x, str>>) {
-        self.parts.push(Text {
-            field,
-            text: text.into(),
-            language: Type::Static,
-        });
-    }
-}
-
-impl<'x, T: Into<u8> + Display> IntoOperations for FtsIndexBuilder<'x, T> {
-    fn build(self, batch: &mut BatchBuilder) {
-        let mut detect = LanguageDetector::new();
-        let mut tokens: VecMap<u8, AHashSet<String>> = VecMap::new();
-        let mut parts = Vec::new();
-
-        for text in self.parts {
-            match text.language {
-                Type::Stem(language) => {
-                    let language = if language == Language::Unknown {
-                        detect.detect(&text.text, MIN_LANGUAGE_SCORE)
-                    } else {
-                        language
-                    };
-                    parts.push((text.field, language, text.text));
-                }
-                Type::Tokenize => {
-                    let tokens = tokens.get_mut_or_insert(text.field.into());
-                    for token in SpaceTokenizer::new(text.text.as_ref(), MAX_TOKEN_LENGTH) {
-                        tokens.insert(token);
-                    }
-                }
-                Type::Static => {
-                    tokens
-                        .get_mut_or_insert(text.field.into())
-                        .insert(text.text.into_owned());
-                }
-            }
-        }
-
-        let default_language = detect
-            .most_frequent_language()
-            .unwrap_or(self.default_language);
-        let mut term_index = TermIndexBuilder::new();
-        let mut ops = AHashSet::new();
-
-        for (part_id, (field, language, text)) in parts.into_iter().enumerate() {
-            let language = if language != Language::Unknown {
-                language
-            } else {
-                default_language
-            };
-            let mut terms = Vec::new();
-            let field: u8 = field.into();
-
-            for token in Stemmer::new(&text, language, MAX_TOKEN_LENGTH).collect::<Vec<_>>() {
-                ops.insert(Operation::hash(&token.word, HASH_EXACT, field, true));
-                if let Some(stemmed_word) = &token.stemmed_word {
-                    ops.insert(Operation::hash(stemmed_word, HASH_STEMMED, field, true));
-                }
-                terms.push(term_index.add_stemmed_token(token));
-            }
-
-            if !terms.is_empty() {
-                term_index.add_terms(field, part_id as u32, terms);
-            }
-        }
-
-        for (field, tokens) in tokens {
-            let mut terms = Vec::with_capacity(tokens.len());
-            for token in tokens {
-                ops.insert(Operation::hash(&token, HASH_EXACT, field, true));
-                terms.push(term_index.add_token(Token {
-                    word: token.into(),
-                    from: 0,
-                    to: 0,
-                }));
-            }
-            term_index.add_terms(field, 0, terms);
-        }
-
-        for op in ops {
-            batch.ops.push(op);
-        }
-
-        batch.ops.push(Operation::Value {
-            class: ValueClass::Property {
-                field: u8::MAX,
-                family: u8::MAX,
-            },
-            set: term_index.serialize().into(),
-        });
-    }
-}
-
-impl TokenIndex {
-    fn build_index(self, batch: &mut BatchBuilder, set: bool) {
-        let mut ops = AHashSet::with_capacity(self.tokens.len() * 2);
-        for term in self.terms {
-            for (term_ids, is_exact) in [(term.exact_terms, true), (term.stemmed_terms, false)] {
-                for term_id in term_ids {
-                    if let Some(word) = self.tokens.get(term_id as usize) {
-                        ops.insert(Operation::hash(
-                            word,
-                            if is_exact { HASH_EXACT } else { HASH_STEMMED },
-                            term.field_id,
-                            set,
-                        ));
-                    }
-                }
-            }
-        }
-        for op in ops {
-            batch.ops.push(op);
-        }
-    }
-}
-
-impl IntoOperations for TokenIndex {
-    fn build(self, batch: &mut BatchBuilder) {
-        self.build_index(batch, false);
-        batch.ops.push(Operation::Value {
-            class: ValueClass::Property {
-                field: u8::MAX,
-                family: u8::MAX,
-            },
-            set: None,
-        });
-    }
-}
-
-impl IntoOperations for RawValue<TokenIndex> {
-    fn build(self, batch: &mut BatchBuilder) {
-        self.inner.build_index(batch, true);
-        batch.ops.push(Operation::Value {
-            class: ValueClass::Property {
-                field: u8::MAX,
-                family: u8::MAX,
-            },
-            set: self.raw.into(),
-        });
-    }
-}
-
-pub trait ToTokens {
-    fn to_tokens(&self) -> HashSet<String>;
-}
-
-impl ToTokens for &str {
-    fn to_tokens(&self) -> HashSet<String> {
-        let mut tokens = HashSet::new();
-        for token in SpaceTokenizer::new(self, MAX_TOKEN_LENGTH) {
-            tokens.insert(token);
-        }
-        tokens
-    }
-}
-
-impl ToTokens for &String {
-    fn to_tokens(&self) -> HashSet<String> {
-        self.as_str().to_tokens()
-    }
-}
diff --git a/crates/store/src/fts/index.rs b/crates/store/src/fts/index.rs
new file mode 100644
index 00000000..1493fdcd
--- /dev/null
+++ b/crates/store/src/fts/index.rs
@@ -0,0 +1,372 @@
+/*
+ * Copyright (c) 2023 Stalwart Labs Ltd.
+ *
+ * This file is part of the Stalwart Mail Server.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as
+ * published by the Free Software Foundation, either version 3 of
+ * the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ * in the LICENSE file at the top-level directory of this distribution.
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ * You can be released from the requirements of the AGPLv3 license by
+ * purchasing a commercial license. Please contact licensing@stalw.art
+ * for more details.
+*/
+
+use std::{borrow::Cow, fmt::Display};
+
+use ahash::{AHashMap, AHashSet};
+use nlp::{
+    language::{
+        detect::{LanguageDetector, MIN_LANGUAGE_SCORE},
+        stemmer::Stemmer,
+        Language,
+    },
+    tokenizers::word::WordTokenizer,
+};
+
+use crate::{
+    backend::MAX_TOKEN_LENGTH,
+    write::{
+        hash::TokenType, key::KeySerializer, BatchBuilder, BitmapClass, BitmapHash, Operation,
+        ValueClass,
+    },
+    Deserialize, Error, Store, ValueKey, U64_LEN,
+};
+
+use super::Field;
+
+#[derive(Debug)]
+struct Text<'x, T: Into<u8> + Display + Clone + std::fmt::Debug> {
+    field: Field<T>,
+    text: Cow<'x, str>,
+    typ: Type,
+}
+
+#[derive(Debug)]
+enum Type {
+    Text(Language),
+    Tokenize,
+    Keyword,
+}
+
+#[derive(Debug)]
+pub struct FtsDocument<'x, T: Into<u8> + Display + Clone + std::fmt::Debug> {
+    parts: Vec<Text<'x, T>>,
+    default_language: Language,
+    account_id: u32,
+    collection: u8,
+    document_id: u32,
+}
+
+impl<'x, T: Into<u8> + Display + Clone + std::fmt::Debug> FtsDocument<'x, T> {
+    pub fn with_default_language(default_language: Language) -> FtsDocument<'x, T> {
+        FtsDocument {
+            parts: vec![],
+            default_language,
+            account_id: 0,
+            document_id: 0,
+            collection: 0,
+        }
+    }
+
+    pub fn with_account_id(mut self, account_id: u32) -> Self {
+        self.account_id = account_id;
+        self
+    }
+
+    pub fn with_document_id(mut self, document_id: u32) -> Self {
+        self.document_id = document_id;
+        self
+    }
+
+    pub fn with_collection(mut self, collection: impl Into<u8>) -> Self {
+        self.collection = collection.into();
+        self
+    }
+
+    pub fn index(&mut self, field: Field<T>, text: impl Into<Cow<'x, str>>, language: Language) {
+        self.parts.push(Text {
+            field,
+            text: text.into(),
+            typ: Type::Text(language),
+        });
+    }
+
+    pub fn index_tokenized(&mut self, field: Field<T>, text: impl Into<Cow<'x, str>>) {
+        self.parts.push(Text {
+            field,
+            text: text.into(),
+            typ: Type::Tokenize,
+        });
+    }
+
+    pub fn index_keyword(&mut self, field: Field<T>, text: impl Into<Cow<'x, str>>) {
+        self.parts.push(Text {
+            field,
+            text: text.into(),
+            typ: Type::Keyword,
+        });
+    }
+}
+
+impl<T: Into<u8> + Display + Clone + std::fmt::Debug> From<Field<T>> for u8 {
+    fn from(value: Field<T>) -> Self {
+        match value {
+            Field::Body => 0,
+            Field::Attachment => 1,
+            Field::Keyword => 2,
+            Field::Header(value) => 3 + value.into(),
+        }
+    }
+}
+
+impl Store {
+    pub async fn fts_index<T: Into<u8> + Display + Clone + std::fmt::Debug>(
+        &self,
+        document: FtsDocument<'_, T>,
+    ) -> crate::Result<()> {
+        let mut detect = LanguageDetector::new();
+        let mut tokens: AHashMap<BitmapHash, AHashSet<u8>> = AHashMap::new();
+        let mut parts = Vec::new();
+
+        for text in document.parts {
+            match text.typ {
+                Type::Text(language) => {
+                    let language = if language == Language::Unknown {
+                        detect.detect(&text.text, MIN_LANGUAGE_SCORE)
+                    } else {
+                        language
+                    };
+                    parts.push((text.field, language, text.text));
+                }
+                Type::Tokenize => {
+                    let field = u8::from(text.field);
+                    for token in WordTokenizer::new(text.text.as_ref(), MAX_TOKEN_LENGTH) {
+                        tokens
+                            .entry(BitmapHash::new(token.word.as_ref()))
+                            .or_default()
+                            .insert(TokenType::word(field));
+                    }
+                }
+                Type::Keyword => {
+                    let field = u8::from(text.field);
+                    tokens
+                        .entry(BitmapHash::new(text.text.as_ref()))
+                        .or_default()
+                        .insert(TokenType::word(field));
+                }
+            }
+        }
+
+        let default_language = detect
+            .most_frequent_language()
+            .unwrap_or(document.default_language);
+
+        for (field, language, text) in parts.into_iter() {
+            let language = if language != Language::Unknown {
+                language
+            } else {
+                default_language
+            };
+            let field: u8 = field.into();
+
+            let mut last_token = Cow::Borrowed("");
+            for token in Stemmer::new(&text, language, MAX_TOKEN_LENGTH) {
+                if !last_token.is_empty() {
+                    tokens
+                        .entry(BitmapHash::new(&format!("{} {}", last_token, token.word)))
+                        .or_default()
+                        .insert(TokenType::bigram(field));
+                }
+
+                tokens
+                    .entry(BitmapHash::new(token.word.as_ref()))
+                    .or_default()
+                    .insert(TokenType::word(field));
+
+                if let Some(stemmed_word) = token.stemmed_word {
+                    tokens
+                        .entry(BitmapHash::new(stemmed_word.as_ref()))
+                        .or_default()
+                        .insert(TokenType::stemmed(field));
+                }
+
+                last_token = token.word;
+            }
+        }
+
+        if tokens.is_empty() {
+            return Ok(());
+        }
+
+        // Serialize tokens
+        let mut serializer = KeySerializer::new(tokens.len() * U64_LEN * 2);
+        let mut keys = Vec::with_capacity(tokens.len());
+
+        for (hash, fields) in tokens.into_iter() {
+            serializer = serializer
+                .write(hash.hash.as_slice())
+                .write(hash.len)
+                .write(fields.len() as u8);
+            for field in fields.into_iter() {
+                serializer = serializer.write(field);
+                keys.push(Operation::Bitmap {
+                    class: BitmapClass::Text { field, token: hash },
+                    set: true,
+                });
+            }
+        }
+
+        // Write term index
+        let mut batch = BatchBuilder::new();
+        batch
+            .with_account_id(document.account_id)
+            .with_collection(document.collection)
+            .update_document(document.document_id)
+            .set(
+                ValueClass::TermIndex,
+                lz4_flex::compress_prepend_size(&serializer.finalize()),
+            );
+        self.write(batch.build()).await?;
+        let mut batch = BatchBuilder::new();
+        batch
+            .with_account_id(document.account_id)
+            .with_collection(document.collection)
+            .update_document(document.document_id);
+
+        for (pos, key) in keys.into_iter().enumerate() {
+            if pos > 0 && pos & 1023 == 0 {
+                self.write(batch.build()).await?;
+                batch = BatchBuilder::new();
+                batch
+                    .with_account_id(document.account_id)
+                    .with_collection(document.collection)
+                    .update_document(document.document_id);
+            }
+            batch.ops.push(key);
+        }
+
+        if !batch.is_empty() {
+            self.write(batch.build()).await?;
+        }
+
+        Ok(())
+    }
+
+    pub async fn fts_remove(
+        &self,
+        account_id: u32,
+        collection: u8,
+        document_id: u32,
+    ) -> crate::Result<bool> {
+        // Obtain term index
+        let term_index = if let Some(term_index) = self
+            .get_value::<TermIndex>(ValueKey {
+                account_id,
+                collection,
+                document_id,
+                class: ValueClass::TermIndex,
+            })
+            .await?
+        {
+            term_index
+        } else {
+            return Ok(false);
+        };
+
+        // Remove keys
+        let mut batch = BatchBuilder::new();
+        batch
+            .with_account_id(account_id)
+            .with_collection(collection)
+            .update_document(document_id);
+
+        for (pos, key) in term_index.ops.into_iter().enumerate() {
+            if pos > 0 && pos & 1023 == 0 {
+                self.write(batch.build()).await?;
+                batch = BatchBuilder::new();
+                batch
+                    .with_account_id(account_id)
+                    .with_collection(collection)
+                    .update_document(document_id);
+            }
+            batch.ops.push(key);
+        }
+
+        if !batch.is_empty() {
+            self.write(batch.build()).await?;
+        }
+
+        // Remove term index
+        let mut batch = BatchBuilder::new();
+        batch
+            .with_account_id(account_id)
+            .with_collection(collection)
+            .update_document(document_id)
+            .clear(ValueClass::TermIndex);
+
+        self.write(batch.build()).await?;
+
+        Ok(true)
+    }
+
+    pub async fn fts_remove_all(&self, _: u32) -> crate::Result<()> {
+        // No-op
+        // Term indexes are stored in the same key range as the document
+
+        Ok(())
+    }
+}
+
+struct TermIndex {
+    ops: Vec<Operation>,
+}
+
+impl Deserialize for TermIndex {
+    fn deserialize(bytes: &[u8]) -> crate::Result<Self> {
+        let bytes = lz4_flex::decompress_size_prepended(bytes)
+            .map_err(|_| Error::InternalError("Failed to decompress term index".to_string()))?;
+        let mut ops = Vec::new();
+        let mut bytes = bytes.iter().peekable();
+
+        while bytes.peek().is_some() {
+            let mut hash = BitmapHash {
+                hash: [0; 8],
+                len: 0,
+            };
+
+            for byte in hash.hash.iter_mut() {
+                *byte = *bytes.next().ok_or(Error::InternalError(
+                    "Unexpected EOF reading term index".to_string(),
+                ))?;
+            }
+
+            hash.len = *bytes.next().ok_or(Error::InternalError(
+                "Unexpected EOF reading term index".to_string(),
+            ))?;
+            let num_fields = *bytes.next().ok_or(Error::InternalError(
+                "Unexpected EOF reading term index".to_string(),
+            ))?;
+            for _ in 0..num_fields {
+                let field = *bytes.next().ok_or(Error::InternalError(
+                    "Unexpected EOF reading term index".to_string(),
+                ))?;
+                ops.push(Operation::Bitmap {
+                    class: BitmapClass::Text { field, token: hash },
+                    set: false,
+                });
+            }
+        }
+
+        Ok(Self { ops })
+    }
+}
diff --git a/crates/store/src/fts/mod.rs b/crates/store/src/fts/mod.rs
index 8761f076..cb38f1e9 100644
--- a/crates/store/src/fts/mod.rs
+++ b/crates/store/src/fts/mod.rs
@@ -21,55 +21,188 @@
  * for more details.
 */
 
-use crate::{
-    write::{BitmapFamily, Operation},
-    BitmapKey, Serialize, BM_HASH,
-};
+use std::fmt::Display;
 
-use self::{bloom::hash_token, builder::MAX_TOKEN_MASK};
+use nlp::language::Language;
 
-pub mod bloom;
-pub mod builder;
+pub mod index;
 pub mod query;
-pub mod search_snippet;
-pub mod term_index;
-
-impl BitmapKey<Vec<u8>> {
-    pub fn hash(word: &str, account_id: u32, collection: u8, family: u8, field: u8) -> Self {
-        BitmapKey {
-            account_id,
-            collection,
-            family: BM_HASH | family | (word.len() & MAX_TOKEN_MASK) as u8,
-            field,
-            block_num: 0,
-            key: hash_token(word),
+
+#[derive(Clone, Debug)]
+pub enum Field<T: Into<u8> + Display + Clone + std::fmt::Debug> {
+    Header(T),
+    Body,
+    Attachment,
+    Keyword,
+}
+
+#[derive(Debug)]
+pub enum FtsFilter<T: Into<u8> + Display + Clone + std::fmt::Debug> {
+    Exact {
+        field: Field<T>,
+        text: String,
+        language: Language,
+    },
+    Contains {
+        field: Field<T>,
+        text: String,
+        language: Language,
+    },
+    Keyword {
+        field: Field<T>,
+        text: String,
+    },
+    And,
+    Or,
+    Not,
+    End,
+}
+
+impl<T: Into<u8> + Display + Clone + std::fmt::Debug> FtsFilter<T> {
+    pub fn has_text_detect(
+        field: Field<T>,
+        text: impl Into<String>,
+        default_language: Language,
+    ) -> Self {
+        let (text, language) = Language::detect(text.into(), default_language);
+        Self::has_text(field, text, language)
+    }
+
+    pub fn has_text(field: Field<T>, text: impl Into<String>, language: Language) -> Self {
+        let text = text.into();
+        if !matches!(language, Language::None) && (text.starts_with('"') && text.ends_with('"'))
+            || (text.starts_with('\'') && text.ends_with('\''))
+        {
+            FtsFilter::Exact {
+                field,
+                text,
+                language,
+            }
+        } else {
+            FtsFilter::Contains {
+                field,
+                text,
+                language,
+            }
         }
     }
 
-    pub fn value(
-        account_id: u32,
-        collection: impl Into<u8>,
-        field: impl Into<u8>,
-        value: impl BitmapFamily + Serialize,
-    ) -> Self {
-        BitmapKey {
-            account_id,
-            collection: collection.into(),
-            family: value.family(),
-            field: field.into(),
-            block_num: 0,
-            key: value.serialize(),
+    pub fn has_keyword(field: Field<T>, text: impl Into<String>) -> Self {
+        FtsFilter::Keyword {
+            field,
+            text: text.into(),
         }
     }
+
+    pub fn has_english_text(field: Field<T>, text: impl Into<String>) -> Self {
+        Self::has_text(field, text, Language::English)
+    }
 }
 
-impl Operation {
-    pub fn hash(word: &str, family: u8, field: u8, set: bool) -> Self {
-        Operation::Bitmap {
-            family: BM_HASH | family | (word.len() & MAX_TOKEN_MASK) as u8,
-            field,
-            key: hash_token(word),
-            set,
+#[derive(Clone, Copy)]
+pub enum FilterType {
+    And,
+    Or,
+    Not,
+    End,
+    Store,
+    Fts,
+}
+
+pub enum FilterGroup<T: FilterItem> {
+    Fts(Vec<T>),
+    Store(T),
+}
+
+pub trait FilterItem: Clone {
+    fn filter_type(&self) -> FilterType;
+}
+
+pub trait IntoFilterGroup<T: FilterItem + From<FilterType>> {
+    fn into_filter_group(self) -> Vec<FilterGroup<T>>;
+}
+
+impl<T: FilterItem + From<FilterType>> IntoFilterGroup<T> for Vec<T> {
+    fn into_filter_group(self) -> Vec<FilterGroup<T>> {
+        let mut filter = Vec::with_capacity(self.len());
+        let mut iter = self.into_iter();
+        let mut logical_op = None;
+
+        while let Some(item) = iter.next() {
+            if matches!(item.filter_type(), FilterType::Fts) {
+                let mut store_item = None;
+                let mut depth = 0;
+                let mut fts = Vec::with_capacity(5);
+
+                // Add the logical operator if there is one
+                let in_logical_op = if let Some(op) = logical_op.take() {
+                    fts.push(op);
+                    true
+                } else {
+                    false
+                };
+                fts.push(item);
+
+                for item in iter.by_ref() {
+                    match item.filter_type() {
+                        FilterType::And | FilterType::Or | FilterType::Not => {
+                            depth += 1;
+                            fts.push(item);
+                        }
+                        FilterType::End if depth > 0 => {
+                            depth -= 1;
+                            fts.push(item);
+                        }
+                        FilterType::Fts => {
+                            fts.push(item);
+                        }
+                        _ => {
+                            store_item = Some(item);
+                            break;
+                        }
+                    }
+                }
+
+                if in_logical_op {
+                    fts.push(T::from(FilterType::End));
+                }
+
+                if depth > 0 {
+                    let mut store = Vec::with_capacity(depth * 2);
+                    while depth > 0 {
+                        let item = fts.pop().unwrap();
+                        if matches!(
+                            item.filter_type(),
+                            FilterType::And | FilterType::Or | FilterType::Not
+                        ) {
+                            depth -= 1;
+                        }
+                        store.push(FilterGroup::Store(item));
+                    }
+
+                    filter.push(FilterGroup::Fts(fts));
+                    filter.extend(store);
+                } else {
+                    filter.push(FilterGroup::Fts(fts));
+                }
+
+                if let Some(item) = store_item {
+                    filter.push(FilterGroup::Store(item));
+                }
+            } else {
+                match item.filter_type() {
+                    FilterType::And | FilterType::Or => {
+                        logical_op = Some(item.clone());
+                    }
+                    FilterType::Not => {
+                        logical_op = Some(T::from(FilterType::And));
+                    }
+                    _ => {}
+                }
+                filter.push(FilterGroup::Store(item));
+            }
         }
+
+        filter
     }
 }
diff --git a/crates/store/src/fts/query.rs b/crates/store/src/fts/query.rs
index 37938e3f..6be67a75 100644
--- a/crates/store/src/fts/query.rs
+++ b/crates/store/src/fts/query.rs
@@ -21,138 +21,210 @@
  * for more details.
 */
 
-use std::ops::BitOrAssign;
+use std::{
+    fmt::Display,
+    ops::{BitAndAssign, BitOrAssign, BitXorAssign},
+};
 
-use nlp::language::{stemmer::Stemmer, Language};
+use nlp::language::stemmer::Stemmer;
 use roaring::RoaringBitmap;
 
-use crate::{fts::builder::MAX_TOKEN_LENGTH, BitmapKey, ValueKey, HASH_EXACT, HASH_STEMMED};
+use crate::{backend::MAX_TOKEN_LENGTH, fts::FtsFilter, write::BitmapClass, BitmapKey, Store};
 
-use super::term_index::TermIndex;
+struct State<T: Into<u8> + Display + Clone + std::fmt::Debug> {
+    pub op: FtsFilter<T>,
+    pub bm: Option<RoaringBitmap>,
+}
 
-#[async_trait::async_trait]
-pub trait StoreFts: StoreRead {
-    async fn fts_query(
-        &mut self,
+impl Store {
+    pub async fn fts_query<T: Into<u8> + Display + Clone + std::fmt::Debug>(
+        &self,
         account_id: u32,
-        collection: u8,
-        field: u8,
-        text: &str,
-        language: Language,
-        match_phrase: bool,
-    ) -> crate::Result<Option<RoaringBitmap>> {
-        if match_phrase {
-            let mut phrase = Vec::new();
-            let mut bit_keys = Vec::new();
-            for token in language.tokenize_text(text, MAX_TOKEN_LENGTH) {
-                let key = BitmapKey::hash(
-                    token.word.as_ref(),
-                    account_id,
-                    collection,
-                    HASH_EXACT,
+        collection: impl Into<u8>,
+        filters: Vec<FtsFilter<T>>,
+    ) -> crate::Result<RoaringBitmap> {
+        let collection = collection.into();
+        let mut not_mask = RoaringBitmap::new();
+        let mut not_fetch = false;
+
+        let mut state: State<T> = FtsFilter::And.into();
+        let mut stack = Vec::new();
+        let mut filters = filters.into_iter().peekable();
+
+        while let Some(filter) = filters.next() {
+            let mut result = match filter {
+                FtsFilter::Exact {
                     field,
-                );
-                if !bit_keys.contains(&key) {
-                    bit_keys.push(key);
+                    text,
+                    language,
+                } => {
+                    let field: u8 = field.clone().into();
+
+                    let tokens = language
+                        .tokenize_text(text.as_ref(), MAX_TOKEN_LENGTH)
+                        .map(|t| t.word)
+                        .collect::<Vec<_>>();
+                    let keys = if tokens.len() > 1 {
+                        tokens
+                            .windows(2)
+                            .map(|bg| BitmapKey {
+                                account_id,
+                                collection,
+                                class: BitmapClass::bigram(format!("{} {}", bg[0], bg[1]), field),
+                                block_num: 0,
+                            })
+                            .collect::<Vec<_>>()
+                    } else {
+                        tokens
+                            .into_iter()
+                            .map(|word| BitmapKey {
+                                account_id,
+                                collection,
+                                class: BitmapClass::word(word.as_ref(), field),
+                                block_num: 0,
+                            })
+                            .collect::<Vec<_>>()
+                    };
+
+                    self.get_bitmaps_intersection(keys).await?
                 }
+                FtsFilter::Contains {
+                    field,
+                    text,
+                    language,
+                } => {
+                    let mut result = RoaringBitmap::new();
+                    let field: u8 = field.clone().into();
 
-                phrase.push(token.word);
-            }
-            let bitmaps = match self.get_bitmaps_intersection(bit_keys).await? {
-                Some(b) if !b.is_empty() => b,
-                _ => return Ok(None),
-            };
+                    for token in Stemmer::new(text.as_ref(), language, MAX_TOKEN_LENGTH) {
+                        let token1 = BitmapKey {
+                            account_id,
+                            collection,
+                            class: BitmapClass::word(token.word.as_ref(), field),
+                            block_num: 0,
+                        };
+                        let token2 = BitmapKey {
+                            account_id,
+                            collection,
+                            class: BitmapClass::stemmed(
+                                if let Some(stemmed_word) = token.stemmed_word {
+                                    stemmed_word
+                                } else {
+                                    token.word
+                                }
+                                .as_ref(),
+                                field,
+                            ),
+                            block_num: 0,
+                        };
 
-            match phrase.len() {
-                0 => return Ok(None),
-                1 => return Ok(Some(bitmaps)),
-                _ => (),
-            }
+                        match self.get_bitmaps_union(vec![token1, token2]).await? {
+                            Some(b) if !b.is_empty() => {
+                                if !result.is_empty() {
+                                    result &= b;
+                                    if result.is_empty() {
+                                        break;
+                                    }
+                                } else {
+                                    result = b;
+                                }
+                            }
+                            _ => break,
+                        }
+                    }
 
-            let mut results = RoaringBitmap::new();
-            for document_id in bitmaps {
-                if let Some(term_index) = self
-                    .get_value::<TermIndex>(ValueKey::term_index(
+                    if !result.is_empty() {
+                        Some(result)
+                    } else {
+                        None
+                    }
+                }
+                FtsFilter::Keyword { field, text } => {
+                    self.get_bitmap(BitmapKey {
                         account_id,
                         collection,
-                        document_id,
-                    ))
+                        class: BitmapClass::word(text, field),
+                        block_num: 0,
+                    })
                     .await?
-                {
-                    if term_index
-                        .match_terms(
-                            &phrase
-                                .iter()
-                                .map(|w| term_index.get_match_term(w, None))
-                                .collect::<Vec<_>>(),
-                            field.into(),
-                            true,
-                            false,
-                            false,
-                        )
-                        .map_err(|e| {
-                            crate::Error::InternalError(format!(
-                                "TermIndex match_terms failed for {account_id}/{collection}/{document_id}: {e:?}"
-                            ))
-                        })?
-                        .is_some()
-                    {
-                        results.insert(document_id);
+                }
+                op @ (FtsFilter::And | FtsFilter::Or | FtsFilter::Not) => {
+                    stack.push(state);
+                    state = op.into();
+                    continue;
+                }
+                FtsFilter::End => {
+                    if let Some(prev_state) = stack.pop() {
+                        let bm = state.bm;
+                        state = prev_state;
+                        bm
+                    } else {
+                        break;
                     }
-                } else {
-                    tracing::debug!(
-                        event = "error",
-                        context = "fts_query",
-                        account_id = account_id,
-                        collection = collection,
-                        document_id = document_id,
-                        "Document is missing a term index",
-                    );
                 }
-            }
+            };
 
-            if !results.is_empty() {
-                Ok(Some(results))
-            } else {
-                Ok(None)
+            // Only fetch not mask if we need it
+            if matches!(state.op, FtsFilter::Not) && !not_fetch {
+                not_mask = self
+                    .get_bitmap(BitmapKey::document_ids(account_id, collection))
+                    .await?
+                    .unwrap_or_else(RoaringBitmap::new);
+                not_fetch = true;
             }
-        } else {
-            let mut bitmaps = RoaringBitmap::new();
-
-            for token in Stemmer::new(text, language, MAX_TOKEN_LENGTH) {
-                let token1 =
-                    BitmapKey::hash(&token.word, account_id, collection, HASH_EXACT, field);
-                let token2 = if let Some(stemmed_word) = token.stemmed_word {
-                    BitmapKey::hash(&stemmed_word, account_id, collection, HASH_STEMMED, field)
-                } else {
-                    let mut token2 = token1.clone();
-                    token2.family &= !HASH_EXACT;
-                    token2.family |= HASH_STEMMED;
-                    token2
-                };
-
-                match self.get_bitmaps_union(vec![token1, token2]).await? {
-                    Some(b) if !b.is_empty() => {
-                        if !bitmaps.is_empty() {
-                            bitmaps &= b;
-                            if bitmaps.is_empty() {
-                                return Ok(None);
-                            }
+
+            // Apply logical operation
+            if let Some(dest) = &mut state.bm {
+                match state.op {
+                    FtsFilter::And => {
+                        if let Some(result) = result {
+                            dest.bitand_assign(result);
                         } else {
-                            bitmaps = b;
+                            dest.clear();
+                        }
+                    }
+                    FtsFilter::Or => {
+                        if let Some(result) = result {
+                            dest.bitor_assign(result);
+                        }
+                    }
+                    FtsFilter::Not => {
+                        if let Some(mut result) = result {
+                            result.bitxor_assign(&not_mask);
+                            dest.bitand_assign(result);
                         }
                     }
-                    _ => return Ok(None),
-                };
+                    _ => unreachable!(),
+                }
+            } else if let Some(ref mut result_) = result {
+                if let FtsFilter::Not = state.op {
+                    result_.bitxor_assign(&not_mask);
+                }
+                state.bm = result;
+            } else if let FtsFilter::Not = state.op {
+                state.bm = Some(not_mask.clone());
+            } else {
+                state.bm = Some(RoaringBitmap::new());
             }
 
-            Ok(Some(bitmaps))
+            // And short circuit
+            if matches!(state.op, FtsFilter::And) && state.bm.as_ref().unwrap().is_empty() {
+                while let Some(filter) = filters.peek() {
+                    if matches!(filter, FtsFilter::End) {
+                        break;
+                    } else {
+                        filters.next();
+                    }
+                }
+            }
         }
+
+        Ok(state.bm.unwrap_or_default())
     }
 
-    async fn get_bitmaps_union<T: AsRef<[u8]> + Sync + Send>(
+    async fn get_bitmaps_union(
         &self,
-        keys: Vec<BitmapKey<T>>,
+        keys: Vec<BitmapKey<BitmapClass>>,
     ) -> crate::Result<Option<RoaringBitmap>> {
         let mut bm = RoaringBitmap::new();
 
@@ -165,3 +237,12 @@ pub trait StoreFts: StoreRead {
         Ok(if !bm.is_empty() { Some(bm) } else { None })
     }
 }
+
+impl<T: Into<u8> + Display + Clone + std::fmt::Debug> From<FtsFilter<T>> for State<T> {
+    fn from(value: FtsFilter<T>) -> Self {
+        Self {
+            op: value,
+            bm: None,
+        }
+    }
+}
diff --git a/crates/store/src/fts/search_snippet.rs b/crates/store/src/fts/search_snippet.rs
deleted file mode 100644
index 55d6b6b7..00000000
--- a/crates/store/src/fts/search_snippet.rs
+++ /dev/null
@@ -1,275 +0,0 @@
-/*
- * Copyright (c) 2023, Stalwart Labs Ltd.
- *
- * This file is part of Stalwart Mail Server.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as
- * published by the Free Software Foundation, either version 3 of
- * the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Affero General Public License for more details.
- * in the LICENSE file at the top-level directory of this distribution.
- * You should have received a copy of the GNU Affero General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- * You can be released from the requirements of the AGPLv3 license by
- * purchasing a commercial license. Please contact licensing@stalw.art
- * for more details.
-*/
-
-use super::term_index::Term;
-
-fn escape_char(c: char, string: &mut String) {
-    match c {
-        '&' => string.push_str("&amp;"),
-        '<' => string.push_str("&lt;"),
-        '>' => string.push_str("&gt;"),
-        '"' => string.push_str("&quot;"),
-        '\n' | '\r' => string.push(' '),
-        _ => string.push(c),
-    }
-}
-
-fn escape_char_len(c: char) -> usize {
-    match c {
-        '&' => "&amp;".len(),
-        '<' => "&lt;".len(),
-        '>' => "&gt;".len(),
-        '"' => "&quot;".len(),
-        '\r' | '\n' => 1,
-        _ => c.len_utf8(),
-    }
-}
-
-pub fn generate_snippet(terms: &[Term], text: &str) -> Option<String> {
-    let mut snippet = String::with_capacity(text.len());
-    let start_offset = terms.get(0)?.offset as usize;
-
-    if start_offset > 0 {
-        let mut word_count = 0;
-        let mut from_offset = 0;
-        let mut last_is_space = false;
-
-        if text.len() > 240 {
-            for (pos, char) in text.get(0..start_offset)?.char_indices().rev() {
-                // Add up to 2 words or 40 characters of context
-                if char.is_whitespace() {
-                    if !last_is_space {
-                        word_count += 1;
-                        if word_count == 3 {
-                            break;
-                        }
-                        last_is_space = true;
-                    }
-                } else {
-                    last_is_space = false;
-                }
-                from_offset = pos;
-                if start_offset - from_offset >= 40 {
-                    break;
-                }
-            }
-        }
-
-        last_is_space = false;
-        for char in text.get(from_offset..start_offset)?.chars() {
-            if !char.is_whitespace() {
-                last_is_space = false;
-            } else {
-                if last_is_space {
-                    continue;
-                }
-                last_is_space = true;
-            }
-            escape_char(char, &mut snippet);
-        }
-    }
-
-    let mut terms = terms.iter().peekable();
-
-    'outer: while let Some(term) = terms.next() {
-        if snippet.len() + ("<mark>".len() * 2) + term.len as usize + 1 > 255 {
-            break;
-        }
-
-        snippet.push_str("<mark>");
-        snippet.push_str(text.get(term.offset as usize..term.offset as usize + term.len as usize)?);
-        snippet.push_str("</mark>");
-
-        let next_offset = if let Some(next_term) = terms.peek() {
-            next_term.offset as usize
-        } else {
-            text.len()
-        };
-
-        let mut last_is_space = false;
-        for char in text
-            .get(term.offset as usize + term.len as usize..next_offset)?
-            .chars()
-        {
-            if !char.is_whitespace() {
-                last_is_space = false;
-            } else {
-                if last_is_space {
-                    continue;
-                }
-                last_is_space = true;
-            }
-
-            if snippet.len() + escape_char_len(char) <= 255 {
-                escape_char(char, &mut snippet);
-            } else {
-                break 'outer;
-            }
-        }
-    }
-
-    Some(snippet)
-}
-
-#[cfg(test)]
-mod tests {
-
-    use nlp::language::Language;
-
-    use crate::{
-        fts::term_index::{TermIndex, TermIndexBuilder},
-        Deserialize, Serialize,
-    };
-
-    use super::*;
-
-    #[test]
-    fn search_snippets() {
-        let inputs = [
-            (vec![
-                "Help a friend from Abidjan Côte d'Ivoire",
-                concat!(
-                "When my mother died when she was given birth to me, my father took me so ", 
-                "special because I am motherless. Before the death of my late father on 22nd June ",
-                "2013 in a private hospital here in Abidjan Côte d'Ivoire. He secretly called me on his ",
-                "bedside and told me that he has a sum of $7.5M (Seven Million five Hundred ",
-                "Thousand Dollars) left in a suspense account in a local bank here in Abidjan Côte ",
-                "d'Ivoire, that he used my name as his only daughter for the next of kin in deposit of ",
-                "the fund. ",
-                "I am 24year old. Dear I am honorably seeking your assistance in the following ways. ",
-                "1) To provide any bank account where this money would be transferred into. ",
-                "2) To serve as the guardian of this fund. ",
-                "3) To make arrangement for me to come over to your country to further my ",
-                "education and to secure a residential permit for me in your country. ",
-                "Moreover, I am willing to offer you 30 percent of the total sum as compensation for ",
-                "your effort input after the successful transfer of this fund to your nominated ",
-                "account overseas."
-            )],
-                vec![
-                    (
-                        vec!["côte"], 
-                        vec![
-                            "Help a friend from Abidjan <mark>Côte</mark> d'Ivoire", 
-                            concat!(
-                            "in Abidjan <mark>Côte</mark> d'Ivoire. He secretly called me on his bedside ",
-                            "and told me that he has a sum of $7.5M (Seven Million five Hundred Thousand ",
-                            "Dollars) left in a suspense account in a local bank here in Abidjan ",
-                            "<mark>Côte</mark> d'Ivoire, that ")
-                        ]
-                    ),
-                    (
-                        vec!["your", "country"], 
-                        vec![
-                            concat!(
-                            "honorably seeking <mark>your</mark> assistance in the following ways. ", 
-                            "1) To provide any bank account where this money would be transferred into. 2) ",
-                            "To serve as the guardian of this fund. 3) To make arrangement for me to come ",
-                            "over to <mark>your</mark> "
-                            )]
-                    ),
-                    (
-                        vec!["overseas"], 
-                        vec![
-                            "nominated account <mark>overseas</mark>."
-                        ]
-                    ),
-
-                ],
-            ),
-            (vec![
-                "孫子兵法",
-                concat!(
-                "<\"孫子兵法：\">",
-                "孫子曰：兵者，國之大事，死生之地，存亡之道，不可不察也。", 
-                "孫子曰：凡用兵之法，馳車千駟，革車千乘，帶甲十萬；千里饋糧，則內外之費賓客之用，膠漆之材，",
-                "車甲之奉，日費千金，然後十萬之師舉矣。",
-                "孫子曰：凡用兵之法，全國為上，破國次之；全旅為上，破旅次之；全卒為上，破卒次之；全伍為上，破伍次之。",
-                "是故百戰百勝，非善之善者也；不戰而屈人之兵，善之善者也。",
-                "孫子曰：昔之善戰者，先為不可勝，以待敵之可勝，不可勝在己，可勝在敵。故善戰者，能為不可勝，不能使敵必可勝。",
-                "故曰：勝可知，而不可為。",
-                "兵者，詭道也。故能而示之不能，用而示之不用，近而示之遠，遠而示之近。利而誘之，亂而取之，實而備之，強而避之，",
-                "怒而撓之，卑而驕之，佚而勞之，親而離之。攻其無備，出其不意，此兵家之勝，不可先傳也。",
-                "夫未戰而廟算勝者，得算多也；未戰而廟算不勝者，得算少也；多算勝，少算不勝，而況於無算乎？吾以此觀之，勝負見矣。",
-                "孫子曰：凡治眾如治寡，分數是也。鬥眾如鬥寡，形名是也。三軍之眾，可使必受敵而無敗者，奇正是也。兵之所加，",
-                "如以碬投卵者，虛實是也。",
-            )],
-                vec![
-                    (
-                        vec!["孫子兵法"], 
-                        vec![
-                            "<mark>孫子兵法</mark>", 
-                            concat!(
-                            "&lt;&quot;<mark>孫子兵法</mark>：&quot;&gt;孫子曰：兵者，國之大事，死生之地，存亡之道，",
-                            "不可不察也。孫子曰：凡用兵之法，馳車千駟，革車千乘，帶甲十萬；千里饋糧，則內外之費賓客之用，膠"),
-                        ]
-                    ),
-                    (
-                        vec!["孫子曰"], 
-                        vec![
-                            concat!(
-                            "&lt;&quot;孫子兵法：&quot;&gt;<mark>孫子曰</mark>：兵者，國之大事，死生之地，存亡之道，", 
-                            "不可不察也。<mark>孫子曰</mark>：凡用兵之法，馳車千駟，革車千乘，帶甲十萬；千里饋糧，則內外之費賓",
-                            )]
-                    ),
-                ],
-            ),
-        ];
-
-        for (parts, tests) in inputs {
-            let mut builder = TermIndexBuilder::new();
-
-            for (field_num, part) in parts.iter().enumerate() {
-                let mut terms = Vec::new();
-                for token in Language::English.tokenize_text(part, 40) {
-                    terms.push(builder.add_token(token));
-                }
-                builder.add_terms(field_num as u8, 0, terms);
-            }
-
-            let compressed_term_index = builder.serialize();
-            let term_index = TermIndex::deserialize(&compressed_term_index[..]).unwrap();
-
-            for (match_words, snippets) in tests {
-                let mut match_terms = Vec::new();
-                for word in &match_words {
-                    match_terms.push(term_index.get_match_term(word, None));
-                }
-
-                let term_groups = term_index
-                    .match_terms(&match_terms, None, false, true, true)
-                    .unwrap()
-                    .unwrap();
-
-                assert_eq!(term_groups.len(), snippets.len());
-
-                for (term_group, snippet) in term_groups.iter().zip(snippets.iter()) {
-                    assert_eq!(
-                        snippet,
-                        &generate_snippet(&term_group.terms, parts[term_group.field_id as usize])
-                            .unwrap()
-                    );
-                }
-            }
-        }
-    }
-}
diff --git a/crates/store/src/fts/term_index.rs b/crates/store/src/fts/term_index.rs
index 2b876578..3ec6e7e8 100644
--- a/crates/store/src/fts/term_index.rs
+++ b/crates/store/src/fts/term_index.rs
@@ -23,7 +23,7 @@
 
 use std::{borrow::Cow, convert::TryInto};
 
-use crate::{Deserialize, Serialize};
+use crate::{Deserialize, Serialize, U32_LEN, U64_LEN};
 
 use ahash::{AHashMap, AHashSet};
 use bitpacking::{BitPacker, BitPacker1x, BitPacker4x, BitPacker8x};