changeset 698: | 96958d3eb5b0 |
parent: | 8b10eabe89dd |
author: | Richard Westhaver <ellis@rwest.io> |
date: | Fri, 04 Oct 2024 22:04:59 -0400 |
permissions: | -rw-r--r-- |
description: | fixes |
544
ec1d4d544c36
parquet expansion, init leb128, add little-endian octet encoders
Richard Westhaver <ellis@rwest.io>
parents:
diff
changeset
|
1 | ;;; leb128.lisp --- Little-Endian Base 128 Variable Encoding |
ec1d4d544c36
parquet expansion, init leb128, add little-endian octet encoders
Richard Westhaver <ellis@rwest.io>
parents:
diff
changeset
|
2 | |
547 | 3 | ;; (U)LEB128 encoders based on CL-LEB128 |
4 | ||
5 | ;; see https://github.com/mahirvaluj/cl-leb128/blob/main/leb128.lisp |
|
544
ec1d4d544c36
parquet expansion, init leb128, add little-endian octet encoders
Richard Westhaver <ellis@rwest.io>
parents:
diff
changeset
|
6 | |
ec1d4d544c36
parquet expansion, init leb128, add little-endian octet encoders
Richard Westhaver <ellis@rwest.io>
parents:
diff
changeset
|
7 | ;;; Commentary: |
ec1d4d544c36
parquet expansion, init leb128, add little-endian octet encoders
Richard Westhaver <ellis@rwest.io>
parents:
diff
changeset
|
8 | |
ec1d4d544c36
parquet expansion, init leb128, add little-endian octet encoders
Richard Westhaver <ellis@rwest.io>
parents:
diff
changeset
|
9 | ;; ref: https://en.wikipedia.org/wiki/LEB128 |
ec1d4d544c36
parquet expansion, init leb128, add little-endian octet encoders
Richard Westhaver <ellis@rwest.io>
parents:
diff
changeset
|
10 | ;; opt: https://arxiv.org/abs/1503.07387 VByte |
ec1d4d544c36
parquet expansion, init leb128, add little-endian octet encoders
Richard Westhaver <ellis@rwest.io>
parents:
diff
changeset
|
11 | ;; opt: https://arxiv.org/pdf/1709.08990 VByte streaming |
ec1d4d544c36
parquet expansion, init leb128, add little-endian octet encoders
Richard Westhaver <ellis@rwest.io>
parents:
diff
changeset
|
12 | |
ec1d4d544c36
parquet expansion, init leb128, add little-endian octet encoders
Richard Westhaver <ellis@rwest.io>
parents:
diff
changeset
|
13 | ;;; Code: |
ec1d4d544c36
parquet expansion, init leb128, add little-endian octet encoders
Richard Westhaver <ellis@rwest.io>
parents:
diff
changeset
|
14 | (in-package :std/num) |
546 | 15 | |
547 | 16 | (defun encode-leb128 (i) |
17 | "Encode an integer of arbitrary length into a leb128 unsigned-8 buffer" |
|
18 | (let ((more t) (curr) (in 0) (int (make-array |
|
19 | 4 |
|
20 | :fill-pointer 0 |
|
21 | :element-type '(unsigned-byte 8)))) ;(neg (< i 0)) |
|
22 | (declare (fixnum i in)) |
|
23 | (loop while more do |
|
24 | (setf curr (logand i #x7f)) |
|
25 | (setf i (ash i -7)) |
|
26 | (if (or (and (= i 0) (= (logand curr #x40) 0)) |
|
27 | (and (= i -1) (= (logand curr #x40) 64))) |
|
28 | (setf more nil) |
|
29 | (setf curr (logior curr #x80))) |
|
30 | (vector-push-extend curr int) |
|
31 | (incf in)) |
|
32 | (let ((ret (make-array (length int) :element-type '(unsigned-byte 8) :initial-contents int))) |
|
33 | ret))) |
|
34 | ||
35 | (defun read-leb128 (s &optional (start 0)) |
|
36 | "decode signed integer from stream. Returns (values decoded-integer |
|
37 | num-bytes-consumed)" |
|
38 | (declare (fixnum start)) |
|
39 | (when (not (= start 0)) |
|
40 | (loop for i from 0 upto start do (read-byte s))) |
|
41 | (let ((result 0) (shift 0) (curr) (counter 0)) |
|
42 | (declare (fixnum result shift counter)) |
|
43 | (loop do |
|
44 | (setf curr (read-byte s)) |
|
45 | (setf result (logior result (the fixnum (ash (logand curr #x7f) shift)))) |
|
46 | (setf shift (+ 7 shift)) |
|
47 | (incf counter) |
|
48 | (when (= 0 (logand curr #x80)) |
|
49 | (if (= 64 (logand curr #x40)) |
|
50 | (return-from read-leb128 (values (logior result (the fixnum (ash (lognot 0) shift))) counter)) |
|
51 | (return-from read-leb128 (values result counter))))))) |
|
52 | ||
53 | (defun decode-leb128 (buf &optional (start 0)) |
|
54 | "decode signed integer from buffer. Returns (values decoded-integer |
|
55 | num-bytes-consumed)" |
|
56 | (declare (fixnum start) (vector buf)) |
|
57 | (let ((result 0) (shift 0) (curr 0) (counter 0)) |
|
58 | (declare (fixnum result shift counter)) |
|
59 | (loop do |
|
60 | (setf curr (the (unsigned-byte 8) (aref buf start))) |
|
61 | (setf start (+ 1 start)) |
|
62 | (setf result (logior result (the fixnum (ash (logand curr #x7f) shift)))) |
|
63 | (setf shift (+ 7 shift)) |
|
64 | (incf counter) |
|
65 | (when (= 0 (logand curr #x80)) |
|
66 | (if (= 64 (logand curr #x40)) |
|
67 | (return-from decode-leb128 (values (logior result (the fixnum (ash (lognot 0) shift))) counter)) |
|
68 | (return-from decode-leb128 (values result counter))))))) |
|
69 | ||
550
4d34907c69eb
more work on tcompact/thrift, fixed type info in parquet-struct-objects
Richard Westhaver <ellis@rwest.io>
parents:
548
diff
changeset
|
70 | (declaim (ftype (function (integer &optional (unsigned-byte 8)) (array (unsigned-byte 8))) encode-uleb128)) |
548 | 71 | (defun encode-uleb128 (int &optional size) |
72 | "Encode an integer INT as a ULEB128 byte array with SIZE (in bytes)." |
|
550
4d34907c69eb
more work on tcompact/thrift, fixed type info in parquet-struct-objects
Richard Westhaver <ellis@rwest.io>
parents:
548
diff
changeset
|
73 | (declare (integer int)) |
546 | 74 | (let ((more t) (curr) (in 0) (ret (make-array |
548 | 75 | (if size |
76 | size |
|
77 | (if (zerop int) |
|
78 | 1 |
|
79 | (ceiling (/ (log (+ int 1) 2) 7)))) |
|
546 | 80 | :element-type '(unsigned-byte 8)))) ;(neg (< int 0)) |
81 | (loop while more do |
|
82 | (setf curr (logand int #x7f)) |
|
83 | (setf int (ash int -7)) |
|
84 | (if (= int 0) |
|
85 | (setf more nil) |
|
86 | (setf curr (logior curr #x80))) |
|
87 | (setf (aref ret in) curr) |
|
88 | (incf in)) |
|
548 | 89 | ret)) |
546 | 90 | |
563 | 91 | (declaim (ftype (function (vector &optional t) integer) decode-uleb128)) |
547 | 92 | (defun decode-uleb128 (bits &optional (start 0)) |
93 | "Decode an unsigned integer from ULEB128 byte array." |
|
546 | 94 | (let ((result 0) (shift 0) (curr) (counter 0)) |
550
4d34907c69eb
more work on tcompact/thrift, fixed type info in parquet-struct-objects
Richard Westhaver <ellis@rwest.io>
parents:
548
diff
changeset
|
95 | (declare (fixnum shift counter)) |
546 | 96 | (loop do |
547 | 97 | (setf curr (aref bits start)) |
546 | 98 | (setf start (+ 1 start)) |
550
4d34907c69eb
more work on tcompact/thrift, fixed type info in parquet-struct-objects
Richard Westhaver <ellis@rwest.io>
parents:
548
diff
changeset
|
99 | (setf result (logior result (ash (logand curr #x7f) shift))) |
546 | 100 | (setf shift (+ 7 shift)) |
101 | (incf counter) |
|
102 | (when (= 0 (logand curr #x80)) |
|
547 | 103 | (return-from decode-uleb128 (values result counter)))))) |
104 | ||
105 | (defun read-uleb128 (s &optional (start 0)) |
|
106 | "Decode an arbitrarily large unsigned integer from stream. Skip |
|
107 | START number bytes. Return (values integer-decoded |
|
108 | num-bytes-consumed)" |
|
109 | (declare (fixnum start)) |
|
110 | (when (not (= start 0)) |
|
111 | (loop for i from 0 upto start do (read-byte s))) |
|
112 | (let ((result 0) (shift 0) (curr) (counter 0)) |
|
550
4d34907c69eb
more work on tcompact/thrift, fixed type info in parquet-struct-objects
Richard Westhaver <ellis@rwest.io>
parents:
548
diff
changeset
|
113 | (declare (fixnum shift counter)) |
547 | 114 | (loop do |
115 | (setf curr (read-byte s)) |
|
550
4d34907c69eb
more work on tcompact/thrift, fixed type info in parquet-struct-objects
Richard Westhaver <ellis@rwest.io>
parents:
548
diff
changeset
|
116 | (setf result (logior result (ash (logand curr #x7f) shift))) |
547 | 117 | (setf shift (+ 7 shift)) |
118 | (incf counter) |
|
119 | (when (= 0 (logand curr #x80)) |
|
120 | (return-from read-uleb128 (values result counter)))))) |