changelog shortlog graph tags branches changeset files revisions annotate raw help

Mercurial > core / lisp/lib/dat/parquet/rle.lisp

changeset 698: 96958d3eb5b0
parent: ec1d4d544c36
author: Richard Westhaver <ellis@rwest.io>
date: Fri, 04 Oct 2024 22:04:59 -0400
permissions: -rw-r--r--
description: fixes
1 ;;; rle.lisp --- Parquet Run Length Encoding
2 
3 ;;
4 
5 ;;; Commentary:
6 
7 #|
8 rle-bit-packed-hybrid: <length> <encoded-data>
9 // length is not always prepended, please check the table below for more detail
10 length := length of the <encoded-data> in bytes stored as 4 bytes little endian (unsigned int32)
11 encoded-data := <run>*
12 run := <bit-packed-run> | <rle-run>
13 bit-packed-run := <bit-packed-header> <bit-packed-values>
14 bit-packed-header := varint-encode(<bit-pack-scaled-run-len> << 1 | 1)
15 // we always bit-pack a multiple of 8 values at a time, so we only store the number of values / 8
16 bit-pack-scaled-run-len := (bit-packed-run-len) / 8
17 bit-packed-run-len := *see 3 below*
18 bit-packed-values := *see 1 below*
19 rle-run := <rle-header> <repeated-value>
20 rle-header := varint-encode( (rle-run-len) << 1)
21 rle-run-len := *see 3 below*
22 repeated-value := value that is repeated, using a fixed-width of round-up-to-next-byte(bit-width)
23 |#
24 
25 ;; RLE is only supported for the following data types:
26 ;; - Repetition and definition levels
27 ;; - Dictionary indices
28 ;; - Boolean values in data pages, as an alternative to PLAIN encoding
29 
30 ;;; Code:
31 (in-package :dat/parquet)