changelog shortlog graph tags branches changeset files revisions annotate raw help

Mercurial > core / lisp/lib/dat/bencode.lisp

changeset 698: 96958d3eb5b0
parent: 4ba88cac5bc7
author: Richard Westhaver <ellis@rwest.io>
date: Fri, 04 Oct 2024 22:04:59 -0400
permissions: -rw-r--r--
description: fixes
1 ;;; lisp/lib/dat/bencode.lisp --- Bencode Data Format
2 
3 ;; based on https://github.com/nja/cl-bencode/tree/master
4 
5 ;;; Code:
6 (in-package :dat/bencode)
7 
8 ;;; Dictionary
9 (define-condition nonstring-dictionary-key (error)
10  ((key :initarg :key :reader key)))
11 
12 (defun make-dictionary (list)
13  "Makes a dictionary from a plist or alist. Keys must be strings."
14  (let ((dictionary (make-hash-table :test 'equal)))
15  (labels ((add-key-value (key value)
16  (if (stringp key)
17  (setf (gethash key dictionary) value)
18  (restart-case (error 'nonstring-dictionary-key :key key)
19  (skip-key ())
20  (use-value (key)
21  :report "Specify string to use as key"
22  :interactive (lambda ()
23  (format t "Enter a key string: ")
24  (list (read)))
25  (add-key-value key value))))))
26  (if (consp (car list)) ; alist
27  (dolist (cons list dictionary)
28  (destructuring-bind (key . value) cons
29  (add-key-value key value)))
30  (loop for (key value) on list by #'cddr ; plist
31  do (add-key-value key value)))
32  dictionary)))
33 
34 (defparameter *binary-key-p* #'(lambda (x) (equal x '("pieces" "info")))
35  "When decoding dictionary values, this function is passed a list,
36 where the first element is the key of the value. If the dictionary was
37 in turn a dictionary value, that key is the second element of the
38 list, and so on. Should a dictionary be a value in a bencoded list,
39 the corresponding element in the list will be the symbol :list. When
40 the function return a true value, the dictionary value will be
41 binary. Otherwise it will be decoded as a string.
42 
43 The default function in \*binary-key-p\* returns true for the
44 \"pieces\" value in the \"info\" dictionary. All other values are
45 decoded as strings.")
46 
47 (defun get-dictionary (key dictionary)
48  (gethash key dictionary))
49 
50 (defun binary-dictionary-key-p (key)
51  (when (functionp *binary-key-p*)
52  (funcall *binary-key-p* key)))
53 
54 (defun dictionary->alist (dictionary)
55  "Returns an alist representation of the dictionary."
56  (let ((alist))
57  (labels ((add-key-value (key value)
58  (if (stringp key)
59  (push (cons key value) alist)
60  (restart-case (error 'nonstring-dictionary-key :key key)
61  (skip-key ())
62  (use-value (key) :report "Specify string to use as key"
63  (add-key-value key value))))))
64  (maphash #'add-key-value dictionary)
65  (sort alist #'string< :key #'car))))
66 
67 ;;; Encode
68 (defgeneric bencode-encode (object stream &key external-format)
69  (:documentation "Encode object and write it to stream or, if stream
70 is nil, use an in-memory stream and return the resulting sequence.
71 The external-format is used when encoding strings. UTF-8 is the
72 default."))
73 
74 (defmethod bencode-encode (object (stream stream) &key (external-format :utf-8))
75  (if (typep stream 'flexi-stream)
76  (error "No applicable encode method for ~S" object)
77  (bencode-encode object (make-flexi-stream stream :external-format external-format))))
78 
79 (defmethod bencode-encode (object (stream (eql nil)) &key (external-format :utf-8))
80  (with-output-to-sequence (stream)
81  (bencode-encode object (make-flexi-stream stream :external-format external-format))))
82 
83 (defmethod bencode-encode ((list list) (stream flexi-stream) &key &allow-other-keys)
84  (write-byte (char-code #\l) stream)
85  (dolist (x list)
86  (bencode-encode x stream))
87  (write-byte (char-code #\e) stream))
88 
89 (defmethod bencode-encode ((dictionary hash-table) (stream flexi-stream) &key &allow-other-keys)
90  (write-byte (char-code #\d) stream)
91  (dolist (x (dictionary->alist dictionary))
92  (destructuring-bind (k . v) x
93  (bencode-encode k stream)
94  (bencode-encode v stream)))
95  (write-byte (char-code #\e) stream))
96 
97 (defmethod bencode-encode ((string string) (stream flexi-stream) &key &allow-other-keys)
98  (with-accessors ((external-format flexi-stream-external-format))
99  stream
100  (let ((length (octet-length string :external-format external-format)))
101  (write-sequence (string-header length) stream)
102  (write-sequence string stream))))
103 
104 (defmethod bencode-encode ((integer integer) (stream flexi-stream) &key &allow-other-keys)
105  (write-sequence (render-integer integer) stream))
106 
107 (defmethod bencode-encode ((sequence array) (stream flexi-stream) &key &allow-other-keys)
108  (write-sequence (string-header (length sequence)) stream)
109  (write-sequence sequence stream))
110 
111 (defparameter *ascii* (flex:make-external-format :ascii))
112 
113 (defun string-header (length)
114  (string-to-octets (format nil "~a:" length) :external-format *ascii*))
115 
116 (defun render-integer (integer)
117  (string-to-octets (format nil "i~ae" integer) :external-format *ascii*))
118 
119 ;;; Decode
120 defvar *dictionary-keys* nil)
121 
122 (defmacro restart-case-loop (form &body clauses)
123  `(loop (restart-case (return ,form)
124  ,@clauses)))
125 
126 (defgeneric bencode-decode (input &key external-format)
127  (:documentation "Decode a bencode object from a stream or sequence.
128 If input is a flexi-stream, its external-format will be used when
129 decoding strings. If input is a string, all characters must have
130 char-codes that fit in an (unsigned-byte 8). Otherwise, the value of
131 the external-format parameter is used to create a flexi-stream for
132 decoding. The default is UTF-8."))
133 
134 (defmethod bencode-decode ((stream stream) &key (external-format :utf-8))
135  (bencode-decode (make-flexi-stream stream :external-format external-format)))
136 
137 (defmethod bencode-decode ((string string) &key (external-format :utf-8))
138  (bencode-decode (map '(vector (unsigned-byte 8))
139  #'char-code string) :external-format external-format))
140 
141 (defmethod bencode-decode ((sequence sequence) &key (external-format :utf-8))
142  (restart-case-loop (with-input-from-sequence (stream sequence)
143  (bencode-decode (make-flexi-stream stream :external-format external-format)))
144  (retry-sequence (new-external-format)
145  :report "Set external format and retry decoding the sequence from the beginning"
146  :interactive read-external-format
147  (setf external-format new-external-format))))
148 
149 (defmethod bencode-decode ((stream flexi-stream) &key &allow-other-keys)
150  (let ((c (code-char (peek-byte stream))))
151  (case c
152  (#\i (bencode-decode-integer stream))
153  ((#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9)
154  (bencode-decode-string stream))
155  (#\l (bencode-decode-list stream))
156  (#\d (bencode-decode-dictionary stream))
157  (t (error 'invalid-value-type :octet c)))))
158 
159 (define-condition unexpected-octet (error)
160  ((expected-octet :initarg :expected-octet :reader expected-octet)
161  (actual-octet :initarg :actual-octet :reader actual-octet)))
162 
163 (define-condition invalid-value-type (error)
164  ((octet :initarg :octet :reader octet)))
165 
166 (defun must-read-char (stream char)
167  (restart-case
168  (let ((byte (read-byte stream)))
169  (if (eql byte (char-code char))
170  t
171  (error 'unexpected-octet
172  :expected-octet (char-code char)
173  :actual-octet byte)))
174  (continue () t)))
175 
176 (defun maybe-read-char (stream char)
177  (if (eql (peek-byte stream nil t) (char-code char))
178  (code-char (read-byte stream :eof-error-p t))
179  nil))
180 
181 (defun bencode-decode-integer (stream)
182  (must-read-char stream #\i)
183  (let* ((minus (maybe-read-char stream #\-))
184  (integers (read-integers stream))
185  (number (parse-integer integers)))
186  (if (= number 0)
187  (when (or minus (> (length integers) 1))
188  (restart-case (error "Zero must be i0e") (continue ())))
189  (when (char= (elt integers 0) #\0)
190  (restart-case (error "Zero-padded integer") (continue ()))))
191  (must-read-char stream #\e)
192  (if minus
193  (- number)
194  number)))
195 
196 (defun read-integers (stream)
197  (with-output-to-string (string)
198  (loop for octet = (peek-byte stream)
199  while (digit-char-p (code-char octet))
200  do (write-char (code-char (read-byte stream)) string))))
201 
202 (defun read-external-format ()
203  (format t "Enter a flexi-stream external format: ")
204  (multiple-value-list (eval (read))))
205 
206 (defun must-read-octets (stream length)
207  (let* ((array (make-array length :element-type '(unsigned-byte 8)))
208  (read (read-sequence array stream)))
209  (if (= read length)
210  array
211  (restart-case (error "EOF before string end")
212  (continue () (adjust-array array read))))))
213 
214 (defun bencode-decode-string (stream)
215  (with-accessors ((external-format flexi-stream-external-format))
216  stream
217  (let ((length (parse-integer (read-integers stream))))
218  (must-read-char stream #\:)
219  (let ((octets (must-read-octets stream length)))
220  (restart-case-loop (octets-to-string octets :external-format external-format)
221  (use-binary ()
222  :report "Use undecoded binary vector"
223  (return octets))
224  (retry-string (new-external-format)
225  :report "Set external format and continue decoding from the start of the string"
226  :interactive read-external-format
227  (setf external-format new-external-format)))))))
228 
229 (defun bencode-decode-list (stream)
230  (must-read-char stream #\l)
231  (loop until (maybe-read-char stream #\e)
232  collect (let ((*dictionary-keys* (cons :list *dictionary-keys*)))
233  (bencode-decode stream))))
234 
235 (defun bencode-decode-binary-string (stream)
236  (let ((length (parse-integer (read-integers stream))))
237  (must-read-char stream #\:)
238  (must-read-octets stream length)))
239 
240 (defun bencode-decode-dictionary (stream)
241  (must-read-char stream #\d)
242  (loop with list
243  with previous-key
244  until (maybe-read-char stream #\e)
245  do (let ((key (bencode-decode-string stream)))
246  (when (and previous-key (not (string< previous-key key)))
247  (restart-case (error "Key ~S before key ~S in dict" previous-key key)
248  (continue ())))
249  (let* ((*dictionary-keys* (cons key *dictionary-keys*))
250  (value (if (binary-dictionary-key-p *dictionary-keys*)
251  (bencode-decode-binary-string stream)
252  (bencode-decode stream))))
253  (push value list)
254  (push key list)
255  (setf previous-key key)))
256  finally (return (make-dictionary list))))