changelog shortlog graph tags branches changeset files revisions annotate raw help

Mercurial > core / lisp/lib/dat/xml/xml.lisp

changeset 698: 96958d3eb5b0
parent: aac665e2f5bf
author: Richard Westhaver <ellis@rwest.io>
date: Fri, 04 Oct 2024 22:04:59 -0400
permissions: -rw-r--r--
description: fixes
1 ;;; lib/dat/xml.lisp --- XML Data Format
2 
3 ;; based on the re-implementation of https://github.com/rpgoldman/xmls
4 
5 ;; our nodes are called XML-NODE and inherit from OBJ/TREE:NODE.
6 
7 ;; XMLS:NODE-NAME == OBJ/TREE:NODE-KEY
8 
9 ;;; Code:
10 (in-package :dat/xml)
11 
12 ;;; XMLS
13 
14 ;;;-----------------------------------------------------------------------------
15 ;;; GLOBAL SETTINGS
16 ;;;-----------------------------------------------------------------------------
17 (defvar *strip-comments* t)
18 (defvar *compress-whitespace* t)
19 (defvar *discard-processing-instructions*)
20 
21 (eval-when (:compile-toplevel :load-toplevel :execute)
22  (declaim (type vector *entities*))
23  (defvar *entities*
24  #(("lt;" #\<)
25  ("gt;" #\>)
26  ("amp;" #\&)
27  ("apos;" #\')
28  ("quot;" #\")))
29  (defvar *whitespace* (remove-duplicates
30  '(#\Newline #\Space #\Tab #\Return #\Linefeed))))
31 (defvar *char-escapes*
32  (let ((table (make-array 256 :element-type 'string :initial-element "")))
33  (loop
34  for code from 0 to 255
35  for char = (code-char code)
36  for entity = (first (find char *entities* :test #'char= :key #'second))
37  do (setf (svref table code)
38  (cond
39  (entity
40  (concatenate 'string "&" entity))
41  ((and (or (< code 32) (> code 126))
42  (not (= code 10))
43  (not (= code 9)))
44  (format nil "&#x~x;" code))
45  (t
46  (format nil "~x" char))))
47  finally (return table))
48  table))
49 
50 ;;;---------------------------------------------------------------------------
51 ;;; DYNAMIC VARIABLES
52 ;;;---------------------------------------------------------------------------
53 (defvar *parser-stream* nil
54  "The currently-being-parsed stream. Used so that we can appropriately track
55 the line number.")
56 (defvar *parser-line-number* nil)
57 
58 
59 
60 ;;;-----------------------------------------------------------------------------
61 ;;; CONDITIONS
62 ;;;-----------------------------------------------------------------------------
63 (define-condition xml-parse-error (error)
64  ((line :initarg :line
65  :initform nil
66  :reader error-line))
67  (:report (lambda (xpe stream)
68  (format stream "XML-PARSE-ERROR~@[ at line ~d~]"
69  (error-line xpe)))))
70 
71 (defmethod initialize-instance :after ((obj xml-parse-error) &key)
72  (unless (slot-value obj 'line)
73  (when *parser-line-number*
74  (setf (slot-value obj 'line) *parser-line-number*))))
75 
76 ;;;-----------------------------------------------------------------------------
77 ;;; NODE INTERFACE
78 ;;;-----------------------------------------------------------------------------
79 (defstruct (xml-node (:constructor %make-xml-node))
80  name
81  ns
82  attrs
83  children)
84 
85 (defun make-xml-node (&key name ns attrs child children)
86  "Convenience function for creating a new xml node."
87  (when (and child children)
88  (error "Cannot specify both :child and :children for MAKE-NODE."))
89  (let ((children (if child
90  (list child)
91  children)))
92  (%make-xml-node :name name :ns ns
93  :children children
94  :attrs attrs)))
95 
96 ;;;---------------------------------------------------------------------------
97 ;;; XML Processing Instruction
98 ;;;---------------------------------------------------------------------------
99 (defstruct proc-inst
100  (target "" :type string)
101  (contents "" :type string)
102  )
103 
104 
105 ;;;-----------------------------------------------------------------------------
106 ;;; UTILITY FUNCTIONS
107 ;;;-----------------------------------------------------------------------------
108 (defun compress-whitespace (str)
109  (if *compress-whitespace*
110  (progn
111  (setf str (string-trim *whitespace* str))
112  (if (= 0 (length str))
113  nil
114  str))
115  str))
116 
117 (defun write-escaped (string stream)
118  (write-string (escape-for-html string) stream))
119 
120 (defun escape-for-html (string)
121  "Escapes the characters #\\<, #\\>, #\\', #\\\", and #\\& for HTML output."
122  (with-output-to-string (out)
123  (with-input-from-string (in string)
124  (loop for char = (read-char in nil nil)
125  while char
126  do (case char
127  ((#\<) (write-string "&lt;" out))
128  ((#\>) (write-string "&gt;" out))
129  ((#\") (write-string "&quot;" out))
130  ((#\') (write-string "&#039;" out))
131  ((#\&) (write-string "&amp;" out))
132  (otherwise (write-char char out)))))))
133 
134 (defun make-extendable-string (&optional (size 10))
135  "Creates an adjustable string with a fill pointer."
136  (make-array size
137  :element-type 'character
138  :adjustable t
139  :fill-pointer 0))
140 
141 (defun push-string (c string)
142  "Shorthand function for adding characters to an extendable string."
143  (vector-push-extend c string))
144 
145 (defun translate-raw-value (raw-value)
146  "Helper function for xml generation."
147  (etypecase raw-value
148  (string raw-value)
149  (symbol (symbol-name raw-value))
150  (integer (format nil "~D" raw-value))
151  (float (format nil "~G" raw-value))))
152 
153 (defun generate-xml (e s indent)
154  "Renders a lisp node tree to an xml string stream."
155  (if (> indent 0) (incf indent))
156  (etypecase e
157  (xml-node
158  (progn
159  (dotimes (i (* 2 (- indent 2)))
160  (write-char #\Space s))
161  (format s "<~A~@[ xmlns=\"~A\"~]" (xml-node-name e) (xml-node-ns e))
162  (loop for a in (xml-node-attrs e)
163  do (progn
164  (write-char #\Space s)
165  (write-string (first a) s)
166  (write-char #\= s)
167  (write-char #\" s)
168  (write-escaped (translate-raw-value (second a)) s)
169  (write-char #\" s))))
170  (if (null (xml-node-children e))
171  (progn
172  (write-string "/>" s)
173  (if (> indent 0) (write-char #\Newline s)))
174  (progn
175  (write-char #\> s)
176  (if (> indent 0) (write-char #\Newline s))
177  (mapc #'(lambda (c) (generate-xml c s indent)) (xml-node-children e))
178  (if (> indent 0)
179  (progn
180  (dotimes (i (* 2 (- indent 2)))
181  (write-char #\Space s))))
182  (format s "</~A>" (xml-node-name e))
183  (if (> indent 0) (write-char #\Newline s)))))
184  (number
185  (generate-xml (translate-raw-value e) s indent))
186  (symbol
187  (generate-xml (translate-raw-value e) s indent))
188  (string
189  (progn
190  (if (> indent 0)
191  (progn
192  (dotimes (i (* 2 (- indent 2)))
193  (write-char #\Space s))))
194  (write-escaped e s)
195  (if (> indent 0) (write-char #\Newline s))))))
196 
197 ;;;-----------------------------------------------------------------------------
198 ;;; PARSER STATE & LOOKAHEAD
199 ;;;-----------------------------------------------------------------------------
200 (defstruct state
201  "Represents parser state. Passed among rules to avoid threading issues."
202  (got-doctype nil)
203  (lines 1 :type integer)
204  nsstack
205  stream)
206 
207 (defun resolve-entity (ent)
208  "Resolves the xml entity ENT to a character. Numeric entities are
209 converted using CODE-CHAR, which only works in implementations that
210 internally encode strings in US-ASCII, ISO-8859-1 or UCS."
211  (declare (type simple-string ent))
212  (or (and (>= (length ent) 2)
213  (char= (char ent 0) #\#)
214  (code-char
215  (if (char= (char ent 1) #\x)
216  (parse-integer ent :start 2 :end (- (length ent) 1) :radix 16)
217  (parse-integer ent :start 1 :end (- (length ent) 1)))))
218  (second (find ent *entities* :test #'string= :key #'first))
219  (error "Unable to resolve entity ~S" ent)))
220 
221 (declaim (inline peek-stream))
222 (defun peek-stream (stream)
223  "Looks one character ahead in the input stream. Serves as a potential hook for
224 character translation."
225  (peek-char nil stream nil))
226 
227 (defun read-stream (stream)
228  "Reads a character from the stream, translating entities as it goes."
229  (let ((c (read-char stream nil)))
230  (if (and c (not (char= c #\&)))
231  c
232  (loop with ent = (make-extendable-string 5)
233  for char = (read-char stream)
234  do (push-string char ent)
235  until (char= char #\;)
236  finally (return (resolve-entity (coerce ent 'simple-string)))))))
237 
238 ;;;---------------------------------------------------------------------------
239 ;;; Shadow READ-CHAR and UNREAD-CHAR so we can count lines while we parse...
240 ;;;---------------------------------------------------------------------------
241 (defun read-char (&optional (stream *standard-input*) (eof-error-p t) eof-value recursive-p)
242  (let ((eof-p nil))
243  (let ((c
244  (catch 'char-return
245  (handler-bind
246  ((end-of-file
247  #'(lambda (e)
248  (declare (ignore e))
249  (unless eof-error-p
250  (setf eof-p t)
251  (throw 'char-return eof-value)))))
252  (common-lisp:read-char stream t nil recursive-p)))))
253  (when (and (eq stream *parser-stream*)
254  (not eof-p)
255  (char= c #\newline))
256  (incf *parser-line-number*))
257  c)))
258 
259 (defun unread-char (char &optional (stream *standard-input*))
260  (when (char= char #\newline)
261  (decf *parser-line-number*))
262  (common-lisp:unread-char char stream))
263 
264 ;;;END shadowing--------------------------------------------------------------
265 
266 (define-symbol-macro next-char (peek-stream (state-stream s)))
267 
268 (defmacro eat ()
269  "Consumes one character from the input stream."
270  `(read-char (state-stream s)))
271 
272 (defmacro puke (char)
273  "The opposite of EAT."
274  `(unread-char ,char (state-stream s)))
275 
276 (defmacro match (&rest matchers)
277  "Attempts to match the next input character with one of the supplied matchers."
278  `(let ((c (peek-stream (state-stream s))))
279  (and c
280  (or ,@(loop for m in matchers
281  collect (etypecase m
282  (standard-char `(char= ,m c))
283  (symbol `(,m c)))))
284  ;; cheat here a little bit - eat entire char entity instead
285  ;; of peeked char
286  (read-stream (state-stream s)))))
287 
288 (defmacro match-seq (&rest sequence)
289  "Tries to match the supplied matchers in sequence with characters in the input stream."
290  `(and ,@(loop for s in sequence
291  collect `(match ,s))))
292 
293 (defmacro match* (&rest sequence)
294  "Matches any occurances of any of the supplied matchers."
295  `(loop with data = (make-extendable-string 10)
296  for c = (match ,@sequence)
297  while c
298  do (push-string c data)
299  finally (return data)))
300 
301 (defmacro match+ (&rest sequence)
302  "Matches one or more occurances of any of the supplied matchers."
303  `(and (peek ,@sequence)
304  (match* ,@sequence)))
305 
306 (defmacro peek (&rest matchers)
307  "Looks ahead for an occurance of any of the supplied matchers."
308  `(let ((c (peek-stream (state-stream s))))
309  (or ,@(loop for m in matchers
310  collect (etypecase m
311  (standard-char `(char= ,m c))
312  (symbol `(,m c)))))))
313 
314 (defmacro must (&rest body)
315  "Throws a parse error if the supplied forms do not succeed."
316  `(or (progn ,@body)
317  (error 'xml-parse-error)))
318 
319 ;;;-----------------------------------------------------------------------------
320 ;;; PARSER INTERNAL FUNCTIONS
321 ;;;-----------------------------------------------------------------------------
322 (defstruct element
323  "Common return type of all rule functions."
324  (type nil :type symbol)
325  (val nil))
326 
327 (defun resolve-namespace (elem env)
328  "Maps the ns prefix to its associated url via the supplied ns env."
329  (let ((ns (xml-node-ns elem)))
330  (dolist (e env)
331  (let ((nsurl (assoc ns e :test #'string=)))
332  (and nsurl
333  (setf (xml-node-ns elem) (cadr nsurl))
334  (return ns))))))
335 
336 ;;;-----------------------------------------------------------------------------
337 ;;; MATCH AND RULE BUILDING UTILITIES
338 ;;;-----------------------------------------------------------------------------
339 (defmacro defmatch (name &rest body)
340  "Match definition macro that provides a common lexical environment for matchers."
341  `(defun ,name (c)
342  ,@body))
343 
344 (defmacro defrule (name &rest body)
345  "Rule definition macro that provides a common lexical environment for rules."
346  `(defun ,name (s)
347  ,@body))
348 
349 (defmacro matchfn (name)
350  "Convenience macro for creating an anonymous function wrapper around a matcher macro."
351  `(lambda (s) (match ,name)))
352 
353 (defun none-or-more (s func)
354  "Collects any matches of the supplied rule with the input stream."
355  (declare (type function func))
356  (let ((val (funcall func s)))
357  (if val
358  (multiple-value-bind (res nextval)
359  (none-or-more s func)
360  (values res (cons val nextval)))
361  (values t nil))))
362 
363 (defun one-or-more (s func)
364  "Collects one or more matches of the supplied rule with the input stream."
365  (declare (type function func))
366  (let ((val (funcall func s)))
367  (if val
368  (multiple-value-bind (res nextval)
369  (none-or-more s func)
370  (declare (ignore res))
371  (cons val nextval))
372  nil)))
373 
374 ;;;-----------------------------------------------------------------------------
375 ;;; MATCHERS
376 ;;;-----------------------------------------------------------------------------
377 (defmatch digit ()
378  (and c (digit-char-p c)))
379 
380 (defmatch letter ()
381  (and c (alpha-char-p c)))
382 
383 ;; Modified because *whitespace* is not defined at compile
384 ;; time. [2004/08/31:rpg]
385 (defmatch ws-char ()
386  (member c *whitespace*))
387 ;;; (case c
388 ;;; (#.*whitespace* t)
389 ;;; (t nil)))
390 
391 (defmatch namechar ()
392  (or
393  (and c (alpha-char-p c))
394  (and c (digit-char-p c))
395  (case c
396  ((#\. #\- #\_ #\:) t))))
397 
398 (defmatch ncname-char ()
399  (or
400  (and c (alpha-char-p c))
401  (and c (digit-char-p c))
402  (case c
403  ((#\. #\- #\_) t))))
404 
405 (defmatch attr-text-dq ()
406  (and c (not (member c (list #\< #\")))))
407 
408 (defmatch attr-text-sq ()
409  (and c (not (member c (list #\< #\')))))
410 
411 (defmatch chardata ()
412  (and c (not (char= c #\<))))
413 
414 (defmatch comment-char ()
415  (and c (not (eql c #\-))))
416 
417 ;;;-----------------------------------------------------------------------------
418 ;;; RULES
419 ;;;-----------------------------------------------------------------------------
420 (defrule ncname ()
421  (and (peek letter #\_)
422  (match+ ncname-char)))
423 
424 (defrule qname ()
425  (let (name suffix)
426  (and
427  (setf name (ncname s))
428  (or
429  (and
430  (match #\:)
431  (setf suffix (ncname s)))
432  t))
433  (values name suffix)))
434 
435 (defrule attr-or-nsdecl ()
436  (let (suffix name val)
437  (and
438  (setf (values name suffix) (qname s))
439  (or
440  (and
441  (progn
442  (match* ws-char)
443  (match #\=))
444  (or
445  (and
446  (progn
447  (match* ws-char)
448  (match #\"))
449  (setf val (match* attr-text-dq))
450  (match #\"))
451  (and
452  (progn
453  (match* ws-char)
454  (match #\'))
455  (setf val (match* attr-text-sq))
456  (match #\'))))
457  t)
458  (if (string= "xmlns" name)
459  (list 'nsdecl suffix val)
460  ;; If SUFFIX is true, then NAME is Prefix and SUFFIX is
461  ;; LocalPart.
462  (if suffix
463  (list 'attr suffix val :attr-ns name)
464  (list 'attr name val))))))
465 
466 (defrule ws ()
467  (and (match+ ws-char)
468  (make-element :type 'whitespace :val nil)))
469 
470 (defrule name ()
471  (and
472  (peek namechar #\_ #\:)
473  (match* namechar)))
474 
475 (defrule ws-attr-or-nsdecl ()
476  (and
477  (ws s)
478  (attr-or-nsdecl s)))
479 
480 (defrule start-tag ()
481  (let (name suffix attrs nsdecls)
482  (and
483  (peek namechar)
484  (setf (values name suffix) (qname s))
485  (multiple-value-bind (res a)
486  (none-or-more s #'ws-attr-or-nsdecl)
487  (mapcar (lambda (x) (if (eq (car x) 'attr)
488  (push (cdr x) attrs)
489  (push (cdr x) nsdecls)))
490  a)
491  res)
492  (or (ws s) t)
493  (values
494  (make-xml-node
495  :name (or suffix name)
496  :ns (and suffix name)
497  :attrs attrs)
498  nsdecls))))
499 
500 (defrule end-tag ()
501  (let (name suffix)
502  (and
503  (match #\/)
504  (setf (values name suffix) (qname s))
505  (or (ws s) t)
506  (match #\>)
507  (make-element :type 'end-tag :val (or suffix name)))))
508 
509 (defrule comment ()
510  (and
511  (match-seq #\! #\- #\-)
512  (progn
513  (loop until (match-seq #\- #\- #\>)
514  do (eat))
515  t)
516  (make-element :type 'comment)))
517 
518 ;;; For the CDATA matching of ]]> I by hand generated an NFA, and then
519 ;;; determinized it (also by hand). Then I did a simpler thing of just pushing
520 ;;; ALL the data onto the data string, and truncating it when done.
521 (defrule comment-or-cdata ()
522  (and
523  (peek #\!)
524  (must (or (comment s)
525  (and
526  (match-seq #\[ #\C #\D #\A #\T #\A #\[)
527  (loop with data = (make-extendable-string 50)
528  with state = 0
529  for char = (eat)
530  do (push-string char data)
531  do (case state
532  (0
533  (case char
534  (#\]
535  (trace! :cdata "State 0 Match #\], go to state {0,1} = 4.")
536  (setf state 4))
537  (otherwise
538  (trace! :cdata "State 0 Non-], go to (remain in) state 0."))))
539  (4 ; {0, 1}
540  (case char
541  (#\]
542  (trace! :cdata "State 4 {0, 1}, match ], go to state {0,1,2} = 5")
543  (setf state 5))
544  (otherwise
545  (trace! :cdata "State 4 {0, 1}, Non-], go to state 0.")
546  (setf state 0))))
547  (5 ; {0, 1, 2}
548  (case char
549  (#\]
550  (trace! :cdata "State 5 {0, 1, 2}, match ], stay in state 5."))
551  (#\>
552  (trace! :cdata "State 5 {0, 1, 2}, match >, finish match and go to state 3.")
553  (setf state 3))
554  (otherwise
555  (trace! :cdata "State 5 {0, 1, 2}, find neither ] nor >; go to state 0.")
556  (setf state 0))))
557  )
558  until (eql state 3)
559  finally (return (make-element
560  :type 'cdata
561  :val (coerce
562  ;; rip the ]]> off the end of the data and return it...
563  (subseq data 0 (- (fill-pointer data) 3))
564  'simple-string)))))))))
565 
566 
567 (declaim (ftype function element)) ; forward decl for content rule
568 (defrule content ()
569  (if (match #\<)
570  (must (or (comment-or-cdata s)
571  (processing-instruction s)
572  (element s)
573  (end-tag s)))
574  (or (let (content)
575  (and (setf content (match+ chardata))
576  (make-element :type 'data :val (compress-whitespace content)))))))
577 
578 (defrule element ()
579  (let (elem children nsdecls end-name)
580  (and
581  ;; parse front end of tag
582  (multiple-value-bind (e n)
583  (start-tag s)
584  (setf elem e)
585  (setf nsdecls n)
586  e)
587  ;; resolve namespaces *before* parsing children
588  (if nsdecls (push nsdecls (state-nsstack s)) t)
589  (or (if (or nsdecls (state-nsstack s))
590  (resolve-namespace elem (state-nsstack s)))
591  t)
592  ;; parse end-tag and children
593  (or
594  (match-seq #\/ #\>)
595  (and
596  (match #\>)
597  (loop for c = (content s)
598  while c
599  do (etypecase c
600  (element (case (element-type c)
601  (end-tag
602  (return (setf end-name (element-val c))))
603  ;; processing instructions may be discarded
604  (pi
605  (unless *discard-processing-instructions*
606  (when (element-val c)
607  (push (element-val c) children))))
608  (t (if (element-val c)
609  (push (element-val c) children)))))))
610  (string= (xml-node-name elem) end-name)))
611  ;; package up new node
612  (progn
613  (setf (xml-node-children elem) (nreverse children))
614  (make-element :type 'elem :val elem)))))
615 
616 (defrule processing-instruction ()
617  (let (name contents)
618  (and
619  (match #\?)
620  (setf name (name s))
621  (not (string= name "xml"))
622  ;; contents of a processing instruction can be arbitrary stuff, as long
623  ;; as it doesn't contain ?>...
624  (setf contents (pi-contents s))
625  ;; if we get here, we have eaten ?> off the input in the course of
626  ;; processing PI-CONTENTS
627  (make-element :type 'pi :val (make-proc-inst :target name :contents contents)))))
628 
629 (defrule pi-contents ()
630  (loop with data = (make-extendable-string 50)
631  with state = 0
632  for char = (eat)
633  do (push-string char data)
634  do (ecase state
635  (0
636  (case char
637  (#\?
638  (trace! :pi-contents "State 0 Match #\?, go to state 1.")
639  (setf state 1))
640  (otherwise
641  (trace! :pi-contents "State 0 ~c, go to (remain in) state 0." char))))
642  (1
643  (case char
644  (#\>
645  (trace! :pi-contents "State 1 Match #\>, done.")
646  (setf state 2))
647  (otherwise
648  (trace! :pi-contents "State 1, ~c, do not match #\>, return to 0." char)
649  (setf state 0)))))
650  until (eql state 2)
651  finally (return (coerce
652  ;; rip the ?> off the end of the data and return it...
653  (subseq data 0 (max 0 (- (fill-pointer data) 2)))
654  'simple-string))))
655 
656 (defrule xmldecl ()
657  (let (name contents)
658  (and
659  (match #\?)
660  (setf name (name s))
661  (string= name "xml")
662  (setf contents (none-or-more s #'ws-attr-or-nsdecl))
663  (match-seq #\? #\>)
664  (make-element :type 'xmldecl :val contents))))
665 
666 (defrule comment-or-doctype ()
667  ;; skip dtd - bail out to comment if it's a comment
668  ;; only match doctype once
669  (and
670  (peek #\!)
671  (or (comment s)
672  (and (not (state-got-doctype s))
673  (must (match-seq #\D #\O #\C #\T #\Y #\P #\E))
674  (loop with level = 1
675  do (case (eat)
676  (#\> (decf level))
677  (#\< (incf level)))
678  until (eq level 0)
679  finally (return t))
680  (setf (state-got-doctype s) t)
681  (make-element :type 'doctype)))))
682 
683 (defrule misc ()
684  (or
685  (ws s)
686  (and (match #\<) (must (or (processing-instruction s)
687  (comment-or-doctype s)
688  (element s))))))
689 
690 (defrule document ()
691  (let (elem)
692  (if (match #\<)
693  (must (or (xmldecl s)
694  (comment-or-doctype s)
695  (setf elem (element s)))))
696  ;; NOTE: I don't understand this: it seems to parse arbitrary crap
697  (unless elem
698  (loop for c = (misc s)
699  while c
700  do (cond ((eql (element-type c) 'elem)
701  (return (setf elem c)))
702  ((and (eql (element-type c) 'pi)
703  (not *discard-processing-instructions*))
704  (return (setf elem c))))))
705 
706  (and elem (element-val elem))))
707 
708 ;;;-----------------------------------------------------------------------------
709 ;;; PUBLIC INTERFACE
710 ;;;-----------------------------------------------------------------------------
711 (defun write-xml (e s &key (indent nil))
712  "Renders a lisp node tree to an xml stream. Indents if indent is non-nil."
713  (if (null s)
714  (toxml e :indent indent)
715  (generate-xml e s (if indent 1 0))))
716 
717 (defun write-prologue (xml-decl doctype s)
718  "Render the leading <?xml ... ?> and <!DOCTYPE ... > tags to an xml stream."
719  (format s "<?xml")
720  (dolist (attrib xml-decl)
721  (format s " ~A=\"~A\"" (car attrib) (cdr attrib)))
722  (format s " ?>~%")
723  (when doctype
724  (format s "<!DOCTYPE ~A>~%" doctype)))
725 
726 (defun write-prolog (xml-decl doctype s)
727  (write-prologue xml-decl doctype s))
728 
729 (defun toxml (e &key (indent nil))
730  "Renders a lisp node tree to an xml string."
731  (with-output-to-string (s)
732  (write-xml e s :indent indent)))
733 
734 (defun xml-parse (s &key (compress-whitespace t) (quash-errors t))
735  "Parses the supplied stream or string into a lisp node tree."
736  (let* ((*compress-whitespace* compress-whitespace)
737  (*discard-processing-instructions* t)
738  (stream
739  (etypecase s
740  (string (make-string-input-stream s))
741  (stream s)))
742  (*parser-stream* stream)
743  (*parser-line-number* 1))
744  (if quash-errors
745  (handler-case
746  (document (make-state :stream stream))
747  (end-of-file () nil)
748  (xml-parse-error () nil))
749  (document (make-state :stream stream)))))
750 
751 ;;; XMLrep
752 (defun make-xmlrep (tag &key (representation-kind :node) namespace attribs children)
753  (case representation-kind
754  ((:list)
755  (cond
756  (namespace
757  (list (list tag namespace) (list attribs) children))
758  (t
759  (list tag (list attribs) children))))
760  ((:node)
761  (make-xml-node :name tag :ns namespace :attrs attribs :children children))
762  (otherwise
763  (error "REPRESENTATION-KIND must be :LIST or :NODE, found ~s" representation-kind))))
764 
765 (defgeneric xmlrep-add-child! (xmlrep child)
766  (:method ((xmlrep xml-node) child)
767  (setf (xml-node-children xmlrep)
768  (append (xml-node-children xmlrep)
769  (list child))))
770  (:method ((xmlrep cons) child)
771  (setf (cddr xmlrep)
772  (append (cddr xmlrep)
773  (list child)))))
774 
775 (defgeneric xmlrep-tag (treenode)
776  (:method ((treenode xml-node))
777  (xml-node-name treenode))
778  (:method ((treenode cons))
779  (let ((tag-name (car treenode)))
780  ;; detect the "namespaced" case
781  (cond
782  ((consp tag-name) (car tag-name))
783  (t tag-name)))))
784 
785 (defun xmlrep-tagmatch (tag treenode)
786  ;;child nodes to XMLREPs could be strings or nodes
787  (unless (stringp treenode)
788  (string-equal tag (xmlrep-tag treenode))))
789 
790 (defgeneric xmlrep-attribs (treenode)
791  (:method ((treenode xml-node))
792  (xml-node-attrs treenode))
793  (:method ((treenode cons))
794  (cadr treenode)))
795 
796 (defgeneric (setf xmlrep-attribs) (attribs treenode)
797  (:argument-precedence-order treenode attribs)
798  (:method (attribs (treenode xml-node))
799  (setf (xml-node-attrs treenode) attribs))
800  (:method (attribs (treenode cons))
801  (setf (cadr treenode) attribs)))
802 
803 (defgeneric xmlrep-children (treenode)
804  (:method ((treenode xml-node))
805  (xml-node-children treenode))
806  (:method ((treenode cons))
807  (cddr treenode)))
808 
809 (defgeneric (setf xmlrep-children) (children treenode)
810  (:argument-precedence-order treenode children)
811  (:method (children (treenode xml-node))
812  (setf (xml-node-children treenode) children))
813  (:method (children (treenode cons))
814  (setf (cddr treenode) children)))
815 
816 (defun xmlrep-string-child (treenode &optional (if-unfound :error))
817  (let ((children (xmlrep-children treenode)))
818  (if (and (eq (length children) 1) (typep (first children) 'string))
819  (first children)
820  (if (eq if-unfound :error)
821  (error "Node does not have a single string child: ~a" treenode)
822  if-unfound)
823  )))
824 
825 (defun xmlrep-integer-child (treenode)
826  (parse-integer (xmlrep-string-child treenode)))
827 
828 (defun xmlrep-find-child-tags (tag treenode)
829  "Find all the children of TREENODE with TAG."
830  (remove-if-not #'(lambda (child) (xmlrep-tagmatch tag child))
831  (xmlrep-children treenode)))
832 
833 (defun xmlrep-find-child-tag (tag treenode
834  &optional (if-unfound :error))
835  "Find a single child of TREENODE with TAG. Returns an error
836 if there is more or less than one such child."
837  (let ((matches (xmlrep-find-child-tags tag treenode)))
838  (case (length matches)
839  (0 (if (eq if-unfound :error)
840  (error "Couldn't find child tag ~A in ~A"
841  tag treenode)
842  if-unfound))
843  (1 (first matches))
844  (otherwise (error "Child tag ~A multiply defined in ~A"
845  tag treenode)))))
846 
847 (defun xmlrep-attrib-value (attrib treenode
848  &optional (if-undefined :error))
849  "Find the value of ATTRIB, a string, in TREENODE.
850 if there is no ATTRIB, will return the value of IF-UNDEFINED,
851 which defaults to :ERROR."
852  (let ((found-attrib (find-attrib attrib treenode)))
853  (cond (found-attrib
854  (second found-attrib))
855  ((eq if-undefined :error)
856  (error "XML attribute ~S undefined in ~S"
857  attrib treenode))
858  (t
859  if-undefined))))
860 
861 (defun find-attrib (attrib treenode)
862  "Returns the attrib CELL (not the attrib value) from
863 TREENODE, if found. This cell will be a list of length 2,
864 the attrib name (a string) and its value."
865  (find attrib (xmlrep-attribs treenode)
866  :test #'string=
867  :key #'car))
868 
869 (defun (setf xmlrep-attrib-value) (value attrib treenode)
870  ;; ideally, we would check this...
871  (let ((old-val (xmlrep-attrib-value attrib treenode nil)))
872  (if old-val
873  (cond ((null value)
874  ;; just delete this attribute...
875  (setf (xmlrep-attribs treenode)
876  (remove attrib (xmlrep-attribs treenode)
877  :test #'string=
878  :key #'first))
879  nil)
880  (t (let ((cell (find-attrib attrib treenode)))
881  (setf (second cell) value)
882  value)))
883  ;; no old value
884  (cond ((null value)
885  nil) ; no old value to delete
886  (t
887  (setf (xmlrep-attribs treenode)
888  (append (xmlrep-attribs treenode)
889  (list (list attrib value))))
890  value)))))
891 
892 (defun xmlrep-boolean-attrib-value (attrib treenode
893  &optional (if-undefined :error))
894  "Find the value of ATTRIB, a string, in TREENODE.
895 The value should be either \"true\" or \"false\". The
896 function will return T or NIL, accordingly. If there is no ATTRIB,
897 will return the value of IF-UNDEFINED, which defaults to :ERROR."
898  (let ((val (xmlrep-attrib-value attrib treenode
899  if-undefined)))
900  (cond ((string-equal val "true")
901  t)
902  ((string-equal val "false") nil)
903  (t (error "Not a boolean value, ~A for attribute ~A."
904  val attrib)))))
905 
906 ;;; XML extraction tool
907 (defun extract-path (key-list xml)
908  "Extracts data from XML parse tree. KEY-LIST is a path for descending down
909 named objects in the XML parse tree. For each KEY-LIST element, XML subforms
910 are searched for a matching tag name. Finally the whole last XML subform on the
911 path is normally returned if found; however the symbol * may be added at the end
912 of KEY-LIST to return list of all objects /enclosed/ by the last subform on
913 KEY-LIST. Also KEY-LIST may be dotted as explained below to return XML tag
914 attributes from the last subform on KEY-LIST.
915 
916 XML is to have the forms as returned by PARSE-TO-LIST or PARSE:
917  (tag-name (attributes-list) subform*),
918  ((tag-name . name-space) (attributes-list) subform*), or
919  #s(node :name tag-name
920  :ns name-space
921  :attrs attributes-list
922  :children subform*)
923 
924 The first element in KEY-LIST must match the top level form in XML.
925 Subsequently each element in the KEY-LIST is to match a subform.
926 
927 An element of KEY-LIST may be a string atom. In that case the first subform
928 with tag-name matching the string is matched. An element of KEY-LIST may also
929 be a list of string atoms in this format:
930  (tag-name (attribute-name attribute-value) ...)
931 
932 The first subform with name matching TAG-NAME /and/ having attributes matching
933 attribute-names and attribute-values is matched. Zero or more attribute/value
934 pairs may be given.
935 
936 Normally the whole subform matching last element in KEY-LIST is returned. The
937 symbol * can be the last element of KEY-LIST to return list of all subforms
938 enclosed by the last matched form. Attributes of last matched subform may be
939 searched by ending KEY-LIST in dot notation, in which case the string after dot
940 matches an attribute name. The two element list of attribute name and value is
941 returned. The symbol * may be used after dot to return the whole attribute list.
942 
943 In the case where the search fails NIL is returned. However it is possible that
944 the search partially succeeds down the key path. Three values are returned
945 altogether and the 2nd and 3rd values give information about how much of
946 KEY-LIST was matched, and at what point in XML:
947  (values RESULT KEY-LIST-FRAGMENT XML-FRAGMENT)
948 
949 When RESULT is non-NIL, the others are NIL. When result is NIL however, the
950 others are:
951  XML-FRAGMENT
952  The last XML form that /did/ match in the key list. It matches the first
953  element of KEY-LIST-FRAGMENT.
954 
955  KEY-LIST-FRAGMENT
956  The /remaining/ part of the KEY-LIST that did not succeed. However the
957  /first/ item on KEY-LIST-FRAGMENT matches the XML-FRAGMENT returned. The
958  failure is at the second item on KEY-LIST-FRAGMENT.
959 
960 In the case of complete failure, where even the very first item on KEY-LIST does not
961 match the top XML form given, all three return values are NIL. (It suffices to check
962 the first two return values.)"
963  (labels ((attribs-match-p ( key-attribs-list xml-attribs-list )
964  ;; search for (attr-name attr-value) pairs from KEY-ATTRIBS-LIST on
965  ;; XML-ATTRIBS-LIST. true if all key pairs found.
966  (loop
967  :with attribs-match-var := t
968  :for attrib-key-pair :in key-attribs-list
969  :do
970  (setq attribs-match-var
971  (and attribs-match-var
972  (find attrib-key-pair xml-attribs-list :test #'equal)))
973  :finally (return attribs-match-var)))
974 
975  (find-test ( key xml-form )
976  ;; test whether the XML-FORM matches KEY
977  (cond
978  ;; just the XML tag name in key
979  ;; XML name is simple string
980  ((and (stringp key)
981  (stringp (xmlrep-tag xml-form)))
982  (string-equal key (xmlrep-tag xml-form)))
983 
984  ;; key form (tag-name (attr-name attr-value) ...)
985  ((and (find-test (car key) xml-form)
986  (attribs-match-p (cdr key) (xmlrep-attribs xml-form))))))
987 
988  (descend ( key-list xml-form )
989  ;; recursive run down KEY-LIST. If XML-FORM runs down to NIL before reaching
990  ;; the end of KEY-LIST, it will be NIL at the end. If not, what is
991  ;; remaining of XML-FORM is the found item.
992  (cond
993  ;; KEY-LIST ends without dotted item, at the target XML form
994  ((null (cdr key-list))
995  (values xml-form nil nil))
996 
997  ;; dotted item at the end of KEY-LIST, search attribute list of target XML form
998  ((atom (cdr key-list))
999  (if (eq '* (cdr key-list))
1000  (values (xmlrep-attribs xml-form) nil nil)
1001  (find (cdr key-list) (xmlrep-attribs xml-form)
1002  :test (lambda (key item) (equal key (car item))))))
1003 
1004  ;; more tag names to match on KEY-LIST
1005  ('t
1006  (if (eq '* (cadr key-list))
1007  (values (xmlrep-children xml-form) nil nil)
1008  (let ((selected-xml-form (find (cadr key-list) (xmlrep-children xml-form)
1009  :test #'find-test)))
1010  (if selected-xml-form
1011  (descend (cdr key-list) selected-xml-form)
1012 
1013  ;; no matching sub-form, indicate what part of KEY-LIST did not match
1014  (values nil key-list xml-form))))))))
1015 
1016  ;; empty list, degenerate usage
1017  (when (null key-list)
1018  (error "KEY-LIST is empty."))
1019 
1020  ;; search down after initial match
1021  (if (find-test (car key-list) xml)
1022  (descend key-list xml)
1023  (values nil nil nil))))
1024