changelog shortlog graph tags branches changeset files revisions annotate raw help

Mercurial > core / lisp/lib/parse/yacc.lisp

changeset 698: 96958d3eb5b0
parent: 2a4f11c0e8c8
author: Richard Westhaver <ellis@rwest.io>
date: Fri, 04 Oct 2024 22:04:59 -0400
permissions: -rw-r--r--
description: fixes
1 ;;; lib/parse/yacc.lisp --- YACC parser
2 
3 ;; from https://github.com/jech/cl-yacc
4 #|
5 ; Copyright (c) 2005-2009 by Juliusz Chroboczek
6 
7 ; Permission is hereby granted, free of charge, to any person obtaining a copy
8 ; of this software and associated documentation files (the "Software"), to deal
9 ; in the Software without restriction, including without limitation the rights
10 ; to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 ; copies of the Software, and to permit persons to whom the Software is
12 ; furnished to do so, subject to the following conditions:
13 
14 ; The above copyright notice and this permission notice shall be included in
15 ; all copies or substantial portions of the Software.
16 
17 ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 ; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 ; THE SOFTWARE.
24 |#
25 
26 ;;; Code:
27 (in-package #:parse/yacc)
28 
29 (deftype index () '(unsigned-byte 14))
30 (deftype signed-index () '(signed-byte 15))
31 
32 ;;; Productions
33 
34 (defstruct (production
35  (:constructor make-production (symbol derives
36  &key action action-form))
37  (:print-function print-production))
38  (id nil :type (or null index))
39  (symbol (required-argument) :type symbol)
40  (derives (required-argument) :type list)
41  (action #'list :type function)
42  (action-form nil))
43 
44 (defun print-production (p s d)
45  (declare (type production p) (stream s) (ignore d))
46  (print-unreadable-object (p s :type t)
47  (format s "~S -> ~{~S~^ ~}" (production-symbol p) (production-derives p))))
48 
49 (declaim (inline production-equal-p))
50 (defun production-equal-p (p1 p2)
51  "Equality predicate for productions within a single grammar"
52  (declare (type production p1 p2))
53  (eq p1 p2))
54 
55 (declaim (inline production<))
56 (defun production< (p1 p2)
57  "Total order on productions within a single grammar"
58  (declare (type production p1 p2))
59  (< (production-id p1) (production-id p2)))
60 
61  ;;; Grammars
62 
63 (defstruct (grammar (:constructor %make-grammar))
64  (name nil)
65  (terminals '() :type list)
66  (precedence '() :type list)
67  (productions '() :type list)
68  (%symbols :undefined :type (or list (member :undefined)))
69  (derives-epsilon '() :type list)
70  (derives-first '() :type list)
71  (derives-first-terminal '() :type list))
72 
73 (defun make-grammar (&key name (start-symbol (required-argument))
74  terminals precedence productions)
75  (declare (symbol name start-symbol) (list terminals productions))
76  (setq productions
77  (cons (make-production 's-prime (list start-symbol)
78  :action #'identity :action-form '#'identity)
79  productions))
80  (do* ((i 0 (+ i 1)) (ps productions (cdr ps)) (p (car ps) (car ps)))
81  ((null ps))
82  (setf (production-id p) i))
83  (%make-grammar :name name :terminals terminals :precedence precedence
84  :productions productions))
85 
86 (defun grammar-discard-memos (grammar)
87  (setf (grammar-%symbols grammar) :undefined)
88  (setf (grammar-derives-epsilon grammar) '())
89  (setf (grammar-derives-first grammar) '())
90  (setf (grammar-derives-first-terminal grammar) '()))
91 
92 (defun terminal-p (symbol grammar)
93  (declare (symbol symbol) (type grammar grammar))
94  (or (eq symbol 'propagate)
95  (and (member symbol (grammar-terminals grammar)) t)))
96 
97 (defun grammar-symbols (grammar)
98  "The set of symbols (both terminal and nonterminal) of GRAMMAR."
99  (declare (type grammar grammar))
100  (cond
101  ((eq :undefined (grammar-%symbols grammar))
102  (let ((res '()))
103  (dolist (p (grammar-productions grammar))
104  (pushnew (production-symbol p) res)
105  (dolist (s (production-derives p))
106  (pushnew s res)))
107  (setf (grammar-%symbols grammar) res)
108  res))
109  (t (grammar-%symbols grammar))))
110 
111 (defun grammar-epsilon-productions (grammar)
112  (remove-if-not #'(lambda (r) (null (production-derives r)))
113  (grammar-productions grammar)))
114 
115 (defun derives-epsilon (symbol grammar &optional seen)
116  "True if symbol derives epsilon."
117  (declare (symbol symbol) (type grammar grammar) (list seen))
118  (let ((e (assoc symbol (grammar-derives-epsilon grammar))))
119  (cond
120  (e (cdr e))
121  ((terminal-p symbol grammar) nil)
122  ((member symbol seen) nil)
123  (t
124  (let ((res (derives-epsilon* symbol grammar (cons symbol seen))))
125  (when (or res (null seen))
126  (setf (grammar-derives-epsilon grammar)
127  (acons symbol res (grammar-derives-epsilon grammar))))
128  res)))))
129 
130 (defun derives-epsilon* (symbol grammar &optional seen)
131  "Unmemoised version of DERIVES-EPSILON."
132  (declare (symbol symbol) (type grammar grammar) (list seen))
133  (dolist (production (grammar-productions grammar))
134  (when (and (eq symbol (production-symbol production))
135  (every #'(lambda (s) (derives-epsilon s grammar seen))
136  (production-derives production)))
137  (return t))))
138 
139 (defun sequence-derives-epsilon (sequence grammar)
140  "Sequence version of DERIVES-EPSILON*."
141  (declare (list sequence) (type grammar grammar))
142  (every #'(lambda (s) (derives-epsilon s grammar)) sequence))
143 
144 (defun print-derives-epsilon (grammar &optional (stream *standard-output*))
145  (let ((seen '()) (de '()))
146  (dolist (p (grammar-productions grammar))
147  (let ((s (production-symbol p)))
148  (unless (member s seen)
149  (push s seen)
150  (when (derives-epsilon s grammar)
151  (push s de)))))
152  (format stream "~D symbols derive epsilon:~%~S~%~%"
153  (length de) (nreverse de))))
154 
155 (defun derives-first (c grammar &optional seen)
156  "The list of symbols A such that C rm->* A.eta for some eta."
157  (declare (symbol c) (type grammar grammar) (list seen))
158  (let ((e (assoc c (grammar-derives-first grammar))))
159  (cond
160  (e (the list (cdr e)))
161  ((terminal-p c grammar) (list c))
162  ((member c seen) '())
163  (t
164  (let ((derives (list c)))
165  (declare (list derives))
166  (dolist (production (grammar-productions grammar))
167  (when (eq c (production-symbol production))
168  (setq derives
169  (union (sequence-derives-first
170  (production-derives production) grammar
171  (cons c seen))
172  derives))))
173  (when (null seen)
174  (setf (grammar-derives-first grammar)
175  (acons c derives (grammar-derives-first grammar))))
176  derives)))))
177 
178 (defun sequence-derives-first (sequence grammar &optional seen)
179  "Sequence version of DERIVES-FIRST."
180  (declare (list sequence) (type grammar grammar) (list seen))
181  (cond
182  ((null sequence) '())
183  ((terminal-p (car sequence) grammar) (list (car sequence)))
184  (t
185  (let ((d1 (derives-first (car sequence) grammar seen)))
186  (if (derives-epsilon (car sequence) grammar)
187  (union d1 (sequence-derives-first (cdr sequence) grammar seen))
188  d1)))))
189 
190 (defun derives-first-terminal (c grammar &optional seen)
191  "The list of terminals a such that C rm->* a.eta, last non-epsilon."
192  (declare (symbol c) (type grammar grammar))
193  (let ((e (assoc c (grammar-derives-first-terminal grammar))))
194  (cond
195  (e (the list (cdr e)))
196  ((terminal-p c grammar) (list c))
197  ((member c seen) '())
198  (t
199  (let ((derives '()))
200  (declare (list derives))
201  (dolist (production (grammar-productions grammar))
202  (when (eq c (production-symbol production))
203  (setq derives
204  (union
205  (sequence-derives-first-terminal
206  (production-derives production) grammar (cons c seen))
207  derives))))
208  (when (null seen)
209  (push (cons c derives) (grammar-derives-first-terminal grammar)))
210  derives)))))
211 
212 (defun sequence-derives-first-terminal (sequence grammar &optional seen)
213  "Sequence version of DERIVES-FIRST-TERMINAL."
214  (declare (list sequence) (type grammar grammar) (list seen))
215  (cond
216  ((null sequence) '())
217  (t
218  (derives-first-terminal (car sequence) grammar seen))))
219 
220 (defun first-terminals (s grammar)
221  "FIRST(s) without epsilon."
222  (declare (atom s) (type grammar grammar))
223  (cond
224  ((terminal-p s grammar) (list s))
225  (t (remove-if-not #'(lambda (s) (terminal-p s grammar))
226  (derives-first s grammar)))))
227 
228 (defun sequence-first-terminals (s grammar)
229  "Sequence version of FIRST-TERMINALS."
230  (declare (list s) (type grammar grammar))
231  (cond
232  ((null s) '())
233  (t (let ((sf (first-terminals (car s) grammar)))
234  (if (derives-epsilon (car s) grammar)
235  (union sf (sequence-first-terminals (cdr s) grammar))
236  sf)))))
237 
238 (defun print-first-terminals (grammar &optional (stream *standard-output*))
239  "Print FIRST (without epsilon) for all symbols of GRAMMAR."
240  (let ((df '()))
241  (dolist (p (grammar-productions grammar))
242  (let ((s (production-symbol p)))
243  (unless (assoc s df)
244  (push (cons s (first-terminals s grammar)) df))))
245  (format stream "First terminals:~%")
246  (dolist (e (nreverse df))
247  (format stream "~S: ~S~%" (car e) (cdr e)))
248  (format stream "~%")))
249 
250 (defun sequence-first (s grammar)
251  "FIRST(s)."
252  (declare (list s) (type grammar grammar))
253  (let ((sf (sequence-first-terminals s grammar)))
254  (if (sequence-derives-epsilon s grammar)
255  (cons 'epsilon sf)
256  sf)))
257 
258 (defun combine-first (f1 s grammar)
259  "FIRST(s1.s) where f1=FIRST(s1)."
260  (declare (list f1 s) (type grammar grammar))
261  (if (member 'epsilon f1)
262  (union (remove 'epsilon f1) (sequence-first s grammar))
263  f1))
264 
265 (defun relative-first (s a grammar &optional seen)
266  "Union of FIRST(eta) for all the eta s.t. S rm->* Aeta."
267  (declare (symbol s a) (type grammar grammar) (list seen))
268  (cond
269  ((terminal-p s grammar) '())
270  ((member s seen) '())
271  (t (let ((res '()))
272  (when (and (eq s a) (derives-epsilon s grammar))
273  (push 'epsilon res))
274  (dolist (p (grammar-productions grammar))
275  (when (and (eq s (production-symbol p))
276  (not (null (production-derives p))))
277  (setf res
278  (union res
279  (relative-first-sequence
280  (production-derives p)
281  a grammar (cons s seen))))))
282  res))))
283 
284 (defun relative-first-sequence (s a grammar &optional seen)
285  "Sequence version of RELATIVE-FIRST."
286  (declare (list s seen) (symbol a) (type grammar grammar))
287  (cond
288  ((null s) '())
289  ((equal s (list a)) (list 'epsilon))
290  ((not (member a (derives-first (car s) grammar))) '())
291  ((eq (car s) a) (sequence-first (cdr s) grammar))
292  (t (relative-first (car s) a grammar seen))))
293 
294 ;;; Items
295 
296 (defstruct (item
297  (:constructor nil)
298  (:print-function print-item)
299  (:copier %copy-item))
300  (production (required-argument) :type production)
301  (position (required-argument) :type index))
302 
303 (defstruct (lr0-item
304  (:include item)
305  (:constructor make-item (production position))
306  (:conc-name item-))
307  (lookaheads '() :type list))
308 
309 (defstruct (lr1-item
310  (:include item)
311  (:constructor make-lr1-item
312  (production position lookahead))
313  (:conc-name item-))
314  (lookahead (required-argument) :type symbol))
315 
316 (defun print-item (i s d)
317  (declare (type item i) (stream s) (ignore d))
318  (print-unreadable-object (i s :type t)
319  (format s "~S -> ~{~S ~}. ~{~S~^ ~}"
320  (item-symbol i) (item-dot-left i) (item-dot-right i))
321  (when (lr1-item-p i)
322  (format s " (~S)" (item-lookahead i)))))
323 
324 (declaim (inline item-derives item-symbol item-action
325  item-dot-right-p item-dot-right item-dot-symbol
326  item-lr1-equal-p item-lr1-hash-value item-equal-p))
327 
328 (defun item-derives (item)
329  (declare (type item item))
330  (production-derives (item-production item)))
331 
332 (defun item-symbol (item)
333  (declare (type item item))
334  (production-symbol (item-production item)))
335 
336 (defun item-action (item)
337  (declare (type item item))
338  (production-action (item-production item)))
339 
340 (defun item-action-form (item)
341  (declare (type item item))
342  (production-action-form (item-production item)))
343 
344 (defun item-lr1-equal-p (i1 i2)
345  "Equality predicate for LR(1) items."
346  (declare (type lr1-item i1 i2))
347  (or (eq i1 i2)
348  (and (eq (item-production i1) (item-production i2))
349  (= (item-position i1) (item-position i2))
350  (eq (item-lookahead i1) (item-lookahead i2)))))
351 
352 (defun item-equal-p (i1 i2)
353  "Equality predicate for LR(0) items."
354  (declare (type item i1 i2))
355  (or (eq i1 i2)
356  (and (eq (item-production i1) (item-production i2))
357  (= (item-position i1) (item-position i2)))))
358 
359 (defun item-lr1-hash-value (item)
360  "Returns an object suitable for keying associations of LR1-items."
361  (declare (type lr1-item item))
362  (cons (production-id (item-production item))
363  (cons (item-position item)
364  (item-lookahead item))))
365 
366 (defun item< (i1 i2)
367  "Total strict order on LR(0) items."
368  (declare (type item i1 i2))
369  (cond
370  ((eq i1 i2) nil)
371  ((production< (item-production i1) (item-production i2)) t)
372  ((not (eq (item-production i1) (item-production i2))) nil)
373  (t (< (item-position i1) (item-position i2)))))
374 
375 (defun item-set-equal-p (c1 c2)
376  "Equality predicate for sorted sets of LR(0) items."
377  (declare (list c1 c2))
378  (cond
379  ((eq c1 c2) t)
380  (t (do ((d1 c1 (cdr d1)) (d2 c2 (cdr d2)))
381  ((or (eq d1 d2) (null d1) (null d2)) (eq d1 d2))
382  (when (not (item-equal-p (car d1) (car d2)))
383  (return nil))))))
384 
385 (defun item-dot-right-p (item)
386  (declare (type item item))
387  (= (item-position item) (length (item-derives item))))
388 
389 (defun item-dot-symbol (item)
390  (declare (type item item))
391  (nth (item-position item) (item-derives item)))
392 
393 (defun item-dot-left (item)
394  (subseq (item-derives item) 0 (item-position item)))
395 
396 (defun item-dot-right (item &optional (n 0))
397  (declare (type signed-index n))
398  (nthcdr (+ n (item-position item)) (item-derives item)))
399 
400 (defun item-shift (item &optional (n 1))
401  (declare (type lr0-item item) (type signed-index n))
402  (make-item (item-production item) (+ (item-position item) n)))
403 
404 (defun lr1-item-shift (item &optional (n 1))
405  (declare (type lr1-item item) (type signed-index n))
406  (make-lr1-item (item-production item) (+ (item-position item) n)
407  (item-lookahead item)))
408 
409 
410 ;;; Sets of items
411 
412 (defstruct (kernel
413  (:constructor %make-kernel (items))
414  (:print-function print-kernel))
415  (id nil :type (or null index))
416  (items '() :type list)
417  (gotos '() :type list))
418 
419 (defun print-kernel (k s d)
420  (declare (type kernel k) (stream s) (ignore d))
421  (print-unreadable-object (k s :type t)
422  (format s "~{~<~D ~:_~:>~}~_ ~D"
423  (kernel-items k) (length (kernel-gotos k)))
424  (when (kernel-id k)
425  (format s " id=~D" (kernel-id k)))))
426 
427 (defun make-kernel (items &optional kernels)
428  (declare (list items kernels))
429  (let* ((items (sort (copy-list items) #'item<))
430  (k (find items kernels
431  :key #'kernel-items :test #'item-set-equal-p)))
432  (or k (%make-kernel items))))
433 
434 (defun kernel-item (kernel)
435  "The item in a singleton set of items."
436  (declare (type kernel kernel))
437  (assert (null (cdr (kernel-items kernel))))
438  (the lr0-item (car (kernel-items kernel))))
439 
440 ;; Items-closure starts by using a list, and switches to hashtables
441 ;; later. Using some sort of balanced tree would probably be better.
442 
443 (defparameter *items-closure-hash-threshold* 20
444  "The number of elements when items-closure switches to using a hashtable.")
445 (declaim (type index *items-closure-hash-threshold*))
446 
447 (deftype lr1-collection () '(or list hash-table))
448 
449 (defun make-lr1-collection (&optional same-kind-as)
450  (etypecase same-kind-as
451  (list '())
452  (hash-table (make-hash-table :test #'equal))))
453 
454 (defun lr1-collection-empty-p (collection)
455  (declare (type lr1-collection collection))
456  (typecase collection
457  (list (null collection))
458  (hash-table (zerop (hash-table-count collection)))))
459 
460 (defun clear-lr1-collection (collection)
461  (declare (type lr1-collection collection))
462  (typecase collection
463  (list '())
464  (hash-table (clrhash collection))))
465 
466 (defun make-hash-table-from-lr1-list (l)
467  (declare (list l))
468  (let ((h (make-hash-table :test #'equal)))
469  (dolist (item l)
470  (declare (type item item))
471  (setf (gethash (item-lr1-hash-value item) h) item))
472  h))
473 
474 (declaim (inline lr1-find))
475 
476 (defun lr1-find (item collection)
477  "Find an LR(1) item equal to ITEM in COLLECTION, or NIL."
478  (declare (optimize (speed 3) (space 0)))
479  (declare (type item item) (type lr1-collection collection))
480  (typecase collection
481  (list (find item collection :test #'item-lr1-equal-p))
482  (hash-table (gethash (item-lr1-hash-value item) collection))))
483 
484 (defun map-lr1-collection (f collection)
485  "Apply F to all elements of COLLECTION."
486  (declare (type function f) (dynamic-extent f)
487  (type lr1-collection collection))
488  (typecase collection
489  (list (mapcar f collection))
490  (hash-table (maphash #'(lambda (k v) (declare (ignore k)) (funcall f v))
491  collection))))
492 
493 (defmacro do-lr1-collection ((var collection) &body body)
494  (let ((c-name (gensym "COLLECTION")) (f-name (gensym "DO-LR1-BODY")))
495  `(let ((,c-name ,collection))
496  (flet ((,f-name (,var) (declare (type lr1-item ,var)) ,@body))
497  (declare (dynamic-extent #',f-name))
498  (map-lr1-collection #',f-name ,c-name)))))
499 
500 (declaim (inline lr1-add))
501 
502 (defun lr1-add (item collection)
503  "Add ITEM to COLLECTION."
504  (declare (type lr1-item item) (type lr1-collection collection))
505  (typecase collection
506  (list (cons item collection))
507  (hash-table
508  (setf (gethash (item-lr1-hash-value item) collection) item)
509  collection)))
510 
511 (defun lr1-add-collection (items collection)
512  "Add all the elements of ITEMS to COLLECTION."
513  (declare (type lr1-collection items collection))
514  (typecase items
515  (list
516  (typecase collection
517  (list (nconc items collection))
518  (hash-table
519  (dolist (item items)
520  (setf (gethash (item-lr1-hash-value item) collection) item))
521  collection)))
522  (hash-table
523  (typecase collection
524  (list (error "This cannot happen"))
525  (hash-table
526  (maphash #'(lambda (k v) (setf (gethash k collection) v))
527  items)
528  collection)))))
529 
530 (defun items-closure (items grammar)
531  "Compute the closure of a set of LR(1) items."
532  (declare (list items) (type grammar grammar))
533  (let ((res '()) (n 0)
534  (threshold *items-closure-hash-threshold*))
535  (declare (optimize (speed 3) (space 0)))
536  (declare (type index n) (type (or list hash-table) res))
537  (labels ((add (item)
538  (declare (type lr1-item item))
539  (unless (lr1-find item res)
540  (setf res (lr1-add item res))
541  (when (listp res)
542  (incf n)
543  (when (> n threshold)
544  (setf res (make-hash-table-from-lr1-list res))))
545  (unless (item-dot-right-p item)
546  (let ((dot-symbol (item-dot-symbol item)))
547  (dolist (production (grammar-productions grammar))
548  (when (eq (production-symbol production) dot-symbol)
549  (dolist (terminal
550  (sequence-first-terminals
551  (append (item-dot-right item 1)
552  (list (item-lookahead item)))
553  grammar))
554  (add (make-lr1-item production 0 terminal))))))))))
555  (mapc #'add items)
556  res)))
557 
558 ;;; Goto transitions
559 
560 (defstruct (goto
561  (:constructor make-goto (symbol target)))
562  (symbol nil :type symbol)
563  (target (required-argument) :type kernel))
564 
565 (declaim (inline goto-equal-p find-goto))
566 
567 (defun goto-equal-p (g1 g2)
568  (declare (type goto g1 g2))
569  (and (eq (goto-symbol g1) (goto-symbol g2))
570  ;; kernels are interned -- see make-kernel.
571  (eq (goto-target g1) (goto-target g2))))
572 
573 (defun find-goto (kernel symbol)
574  (declare (type kernel kernel) (symbol symbol))
575  (find symbol (kernel-gotos kernel) :key #'goto-symbol))
576 
577 (defun compute-goto (kernel symbol grammar)
578  "Compute the kernel of goto(KERNEL, SYMBOL)"
579  (declare (type kernel kernel) (symbol symbol) (type grammar grammar))
580  (let ((result '()))
581  (dolist (item (kernel-items kernel))
582  (when (not (item-dot-right-p item))
583  (let ((c (item-dot-symbol item)))
584  (when (eq c symbol)
585  (pushnew (item-shift item) result :test #'item-equal-p))
586  (dolist (production (grammar-productions grammar))
587  (when (and (not (null (production-derives production)))
588  (eq symbol (car (production-derives production)))
589  (member (production-symbol production)
590  (derives-first c grammar)))
591  (pushnew (make-item production 1) result
592  :test #'item-equal-p))))))
593  result))
594 
595 (defun compute-kernels (grammar)
596  "Compute the set collections of LR(0) items for GRAMMAR."
597  (declare (type grammar grammar))
598  (let ((p0 (car (grammar-productions grammar))))
599  (assert (= 1 (length (production-derives p0))))
600  (let ((kernels '()))
601  (declare (optimize (speed 3) (space 0)))
602  (labels
603  ((add-goto (kernel symbol)
604  (let* ((new-kernel*
605  (compute-goto kernel symbol grammar))
606  (new-kernel
607  (and new-kernel*
608  (make-kernel new-kernel* kernels)))
609  (new-goto (and new-kernel
610  (make-goto symbol new-kernel))))
611  (when new-kernel
612  (unless (memq new-kernel kernels)
613  (add-kernel new-kernel))
614  (unless (member new-goto (kernel-gotos kernel)
615  :test #'goto-equal-p)
616  (push new-goto (kernel-gotos kernel))))))
617  (add-kernel (kernel)
618  (push kernel kernels)
619  (dolist (item (kernel-items kernel))
620  (unless (item-dot-right-p item)
621  (add-goto kernel (item-dot-symbol item))))
622  (dolist (production (grammar-productions grammar))
623  (unless (null (production-derives production))
624  (add-goto kernel (car (production-derives production)))))))
625  (add-kernel (make-kernel (list (make-item p0 0))))
626  (nreverse kernels)))))
627 
628 ;;; Lookaheads
629 
630 (defun compute-lookaheads (kernel grammar &optional propagate-only)
631  "Compute the LR(1) lookaheads for all items in KERNEL.
632 If PROPAGATE-ONLY is true, ignore spontaneous generation."
633  (declare (type kernel kernel) (type grammar grammar))
634  (let ((res '()))
635  (declare (optimize (speed 3) (space 0)))
636  (declare (list res))
637  (dolist (i (kernel-items kernel))
638  (let ((j (items-closure
639  (list (make-lr1-item (item-production i) (item-position i)
640  'propagate))
641  grammar)))
642  (do-lr1-collection (item j)
643  (unless (or (and propagate-only
644  (not (eq 'propagate (item-lookahead item))))
645  (item-dot-right-p item))
646  (push (cons i (lr1-item-shift item)) res)))))
647  res))
648 
649 (defun compute-all-lookaheads (kernels grammar)
650  "Compute the LR(1) lookaheads for all the collections in KERNELS."
651  (declare (list kernels) (type grammar grammar))
652  (setf (item-lookaheads (kernel-item (car kernels))) (list 'yacc-eof-symbol))
653  (let ((previously-changed kernels) (changed '())
654  (propagate-only nil))
655  (declare (optimize (speed 3) (space 0)))
656  (loop
657  (dolist (kernel kernels)
658  (when (memq kernel previously-changed)
659  (let ((lookaheads (compute-lookaheads kernel grammar propagate-only)))
660  (declare (list lookaheads))
661  (dolist (goto (kernel-gotos kernel))
662  (declare (type goto goto))
663  (let ((target (goto-target goto)) (new nil))
664  (flet ((new-lookahead (item lookahead)
665  (declare (type lr1-item item) (symbol lookahead))
666  (let ((i (find item (kernel-items target)
667  :test #'item-equal-p)))
668  (when i
669  (unless (memq lookahead (item-lookaheads i))
670  (push lookahead (item-lookaheads i))
671  (setq new t))))))
672  (dolist (e lookaheads)
673  (let ((i (car e)) (ni (cdr e)))
674  (declare (type lr0-item i) (type lr1-item ni))
675  (cond
676  ((eq 'propagate (item-lookahead ni))
677  ;; propagate
678  (let ((item (find i (kernel-items kernel)
679  :test #'item-equal-p)))
680  (when item
681  (dolist (s (item-lookaheads item))
682  (new-lookahead ni s)))))
683  (t
684  ;; spontaneous generation
685  (new-lookahead ni (item-lookahead ni)))))))
686  (when new
687  (pushnew target changed)))))))
688  (unless changed (return))
689  (psetq previously-changed changed changed '()
690  propagate-only t)))
691  kernels)
692 
693 (defun print-states (kernels lookaheads &optional (stream *standard-output*))
694  (declare (list kernels))
695  (let ((stream (etypecase stream
696  ((member nil) *standard-output*)
697  ((member t) *terminal-io*)
698  (stream stream))))
699  (declare (stream stream))
700  (pprint-logical-block (stream kernels)
701  (loop
702  (pprint-exit-if-list-exhausted)
703  (let ((k (pprint-pop)))
704  (format stream "~S: " (kernel-id k))
705  (pprint-logical-block (stream (kernel-items k))
706  (loop
707  (pprint-exit-if-list-exhausted)
708  (let* ((item (pprint-pop)))
709  (if lookaheads
710  (format stream "~S ~_~S~:@_" item (item-lookaheads item))
711  (format stream "~S~:@_" item)))))
712  (format stream "~_"))))))
713 
714 ;;; Parser generation
715 
716 (defun number-kernels (kernels)
717  "Set a unique ID for all kernels in KERNELS."
718  (declare (list kernels))
719  (let ((id 0))
720  (dolist (k kernels)
721  (setf (kernel-id k) id)
722  (incf id))))
723 
724 (defun print-goto-graph (kernels &optional (stream *standard-output*))
725  "Print the goto graph defined by KERNELS."
726  (declare (list kernels))
727  (let ((stream (etypecase stream
728  ((member nil) *standard-output*)
729  ((member t) *terminal-io*)
730  (stream stream))))
731  (declare (stream stream))
732  (pprint-logical-block (stream kernels)
733  (loop
734  (pprint-exit-if-list-exhausted)
735  (let ((k (pprint-pop)))
736  (format stream "~S: " (kernel-id k))
737  (pprint-logical-block (stream (kernel-gotos k))
738  (loop
739  (pprint-exit-if-list-exhausted)
740  (let ((g (pprint-pop)))
741  (format stream "~S -> ~S ~@:_"
742  (goto-symbol g) (kernel-id (goto-target g))))))
743  (format stream "~@:_"))))))
744 
745 (defstruct (action (:constructor nil)
746  (:print-function print-action))
747  )
748 
749 (defstruct (accept-action (:include action))
750  )
751 
752 (defstruct (reduce-action (:include action)
753  (:constructor make-reduce-action
754  (symbol length
755  &key action action-form)))
756  (symbol (required-argument) :type symbol)
757  (length (required-argument) :type index)
758  (action #'list :type function)
759  (action-form nil))
760 
761 (defstruct (shift-action (:include action)
762  (:constructor
763  make-shift-action (state)))
764  (state (required-argument) :type index))
765 
766 (defun action-equal-p (a1 a2)
767  (declare (type (or null action) a1 a2))
768  (or (eq a1 a2)
769  (and
770  (eq (type-of a1) (type-of a2))
771  (typecase a1
772  (reduce-action
773  (and (eq (reduce-action-symbol a1) (reduce-action-symbol a2))
774  (= (reduce-action-length a1) (reduce-action-length a2))
775  (eq (reduce-action-action a1) (reduce-action-action a2))))
776  (shift-action
777  (= (shift-action-state a1) (shift-action-state a2)))
778  (t t)))))
779 
780 (defun print-action (a s d)
781  (declare (type action a) (stream s) (ignore d))
782  (print-unreadable-object (a s :type t)
783  (typecase a
784  (reduce-action
785  (format s "~S (~D)" (reduce-action-symbol a) (reduce-action-length a)))
786  (shift-action
787  (format s "~D" (shift-action-state a))))))
788 
789 (define-condition yacc-compile-warning (warning)
790  ())
791 
792 (define-condition conflict-warning (yacc-compile-warning simple-warning)
793  ((kind :initarg :kind :reader conflict-warning-kind)
794  (state :initarg :state :reader conflict-warning-state)
795  (terminal :initarg :terminal :reader conflict-warning-terminal))
796  (:report (lambda (w stream)
797  (format stream "~A conflict on terminal ~S in state ~A, ~_~?"
798  (case (conflict-warning-kind w)
799  (:shift-reduce "Shift/Reduce")
800  (:reduce-reduce "Reduce/Reduce")
801  (t (conflict-warning-kind w)))
802  (conflict-warning-terminal w)
803  (conflict-warning-state w)
804  (simple-condition-format-control w)
805  (simple-condition-format-arguments w)))))
806 
807 (define-condition conflict-summary-warning (yacc-compile-warning)
808  ((shift-reduce :initarg :shift-reduce
809  :reader conflict-summary-warning-shift-reduce)
810  (reduce-reduce :initarg :reduce-reduce
811  :reader conflict-summary-warning-reduce-reduce))
812  (:report (lambda (w stream)
813  (format stream "~D Shift/Reduce, ~D Reduce/Reduce conflicts"
814  (conflict-summary-warning-shift-reduce w)
815  (conflict-summary-warning-reduce-reduce w)))))
816 
817 (defstruct (parser (:constructor %make-parser (states goto action)))
818  (states (required-argument) :type index)
819  (goto (required-argument) :type simple-vector)
820  (action (required-argument) :type simple-vector))
821 
822 (defun find-precedence (op precedence)
823  "Return the tail of PRECEDENCE starting with the element containing OP.
824 PRECEDENCE is a list of elements of the form (KEYWORD . (op...))."
825  (declare (symbol op))
826  (cond
827  ((null precedence) '())
828  ((member op (cdar precedence)) precedence)
829  (t (find-precedence op (cdr precedence)))))
830 
831 (defun find-single-terminal (s grammar)
832  "Return the only terminal in S, or NIL if none or multiple."
833  (declare (list s) (type grammar grammar))
834  (cond
835  ((null s) nil)
836  ((terminal-p (car s) grammar)
837  (and (not (member-if #'(lambda (s) (terminal-p s grammar)) (cdr s)))
838  (car s)))
839  (t (find-single-terminal (cdr s) grammar))))
840 
841 (defun handle-conflict (a1 a2 grammar action-productions id s
842  &optional muffle-conflicts)
843  "Decide what to do with a conflict between A1 and A2 in state ID on symbol S.
844 Returns three actions: the chosen action, the number of new sr and rr."
845  (declare (type action a1 a2) (type grammar grammar)
846  (type index id) (symbol s))
847  (when (action-equal-p a1 a2)
848  (return-from handle-conflict (values a1 0 0)))
849  (when (and (shift-action-p a2) (reduce-action-p a1))
850  (psetq a1 a2 a2 a1))
851  (let ((p1 (cdr (assoc a1 action-productions)))
852  (p2 (cdr (assoc a2 action-productions))))
853  ;; operator precedence and associativity
854  (when (and (shift-action-p a1) (reduce-action-p a2))
855  (let* ((op1 (find-single-terminal (production-derives p1) grammar))
856  (op2 (find-single-terminal (production-derives p2) grammar))
857  (op1-tail (find-precedence op1 (grammar-precedence grammar)))
858  (op2-tail (find-precedence op2 (grammar-precedence grammar))))
859  (when (and (eq s op1) op1-tail op2-tail)
860  (cond
861  ((eq op1-tail op2-tail)
862  (return-from handle-conflict
863  (ecase (caar op1-tail)
864  ((:left) (values a2 0 0))
865  ((:right) (values a1 0 0))
866  ((:nonassoc) (values nil 0 0)))))
867  (t
868  (return-from handle-conflict
869  (if (tailp op2-tail (cdr op1-tail))
870  (values a1 0 0)
871  (values a2 0 0))))))))
872  ;; default: prefer shift or first production
873  (unless muffle-conflicts
874  (warn (make-condition
875  'conflict-warning
876  :kind (typecase a1
877  (shift-action :shift-reduce)
878  (t :reduce-reduce))
879  :state id :terminal s
880  :format-control "~S and ~S~@[ ~_~A~]~@[ ~_~A~]"
881  :format-arguments (list a1 a2 p1 p2))))
882  (typecase a1
883  (shift-action (values a1 1 0))
884  (t (values a1 0 1)))))
885 
886 (defun compute-parsing-tables (kernels grammar
887  &key muffle-conflicts)
888  "Compute the parsing tables for grammar GRAMMAR and transitions KERNELS.
889 PRECEDENCE is as in FIND-PRECEDENCE. MUFFLE-WARNINGS is one of NIL, T, :SOME
890 or a list of the form (sr rr)."
891  (declare (list kernels) (type grammar grammar))
892  (let ((numkernels (length kernels)))
893  (let ((goto (make-array numkernels :initial-element '()))
894  (action (make-array numkernels :initial-element '()))
895  (sr-conflicts 0) (rr-conflicts 0)
896  (epsilon-productions (grammar-epsilon-productions grammar))
897  (action-productions '()))
898  (declare (fixnum sr-conflicts rr-conflicts))
899  (flet ((set-action (k symbols a production)
900  (push (cons a production) action-productions)
901  (let ((id (kernel-id k)))
902  (dolist (s symbols)
903  (declare (symbol s))
904  (let ((s-a (assoc s (aref action id))))
905  (cond
906  ((cdr s-a)
907  (multiple-value-bind (new-action s-r r-r)
908  (handle-conflict
909  (cdr s-a) a grammar action-productions
910  id s muffle-conflicts)
911  (setf (cdr s-a) new-action)
912  (incf sr-conflicts s-r) (incf rr-conflicts r-r)))
913  (s-a
914  (setf (cdr s-a) a))
915  (t (push (cons s a) (aref action id))))))))
916  (set-goto (k symbols target)
917  (let ((i (kernel-id k)) (j (kernel-id target)))
918  (dolist (s symbols)
919  (declare (symbol s))
920  (let ((e (assoc s (aref goto i))))
921  (when e
922  (assert (eq j (cdr e)))
923  (return-from set-goto)))
924  (push (cons s j) (aref goto i))))))
925  (do* ((ks kernels (cdr ks)) (k (car ks) (car ks)))
926  ((null ks))
927  (dolist (item (kernel-items k))
928  (cond
929  ((item-dot-right-p item)
930  ;; non-epsilon reduction
931  (let ((la (item-lookaheads item)))
932  (cond
933  ((and (eq 's-prime (item-symbol item))
934  (= 1 (item-position item)))
935  (when (member 'yacc-eof-symbol la)
936  (set-action k (list 'yacc-eof-symbol)
937  (make-accept-action)
938  (item-production item))))
939  (t
940  (set-action k la
941  (make-reduce-action
942  (item-symbol item)
943  (length (item-derives item))
944  :action (item-action item)
945  :action-form (item-action-form item))
946  (item-production item))))))
947  (t
948  (let ((c (item-dot-symbol item)))
949  ;; shift
950  (let ((a (derives-first-terminal c grammar)))
951  (dolist (s a)
952  (let ((g (find-goto k s)))
953  (when g
954  (set-action k (list s)
955  (make-shift-action
956  (kernel-id (goto-target g)))
957  (item-production item))))))
958  ;; epsilon reduction
959  (dolist (a-epsilon epsilon-productions)
960  (let ((a (production-symbol a-epsilon)))
961  (when (member a (derives-first c grammar))
962  (let* ((first-eta
963  (relative-first c a grammar))
964  (first-eta-delta
965  (combine-first first-eta
966  (item-dot-right item 1) grammar))
967  (first-eta-delta-b
968  (if (member 'epsilon first-eta-delta)
969  (union (remove 'epsilon first-eta-delta)
970  (item-lookaheads item))
971  first-eta-delta)))
972  (set-action
973  k first-eta-delta-b
974  (make-reduce-action
975  a 0
976  :action (production-action a-epsilon)
977  :action-form (production-action-form a-epsilon))
978  a-epsilon)
979  ))))
980  ))))
981  (dolist (g (kernel-gotos k))
982  (when (not (terminal-p (goto-symbol g) grammar))
983  (set-goto k (list (goto-symbol g)) (goto-target g))))))
984  (when (null muffle-conflicts) (setq muffle-conflicts '(0 0)))
985  (unless (or (eq t muffle-conflicts)
986  (and (consp muffle-conflicts)
987  (= (car muffle-conflicts) sr-conflicts)
988  (= (cadr muffle-conflicts) rr-conflicts)))
989  (warn (make-condition 'conflict-summary-warning
990  :shift-reduce sr-conflicts
991  :reduce-reduce rr-conflicts)))
992  (%make-parser numkernels goto action))))
993 
994 (defun make-parser (grammar
995  &key (discard-memos t) (muffle-conflicts nil)
996  (print-derives-epsilon nil) (print-first-terminals nil)
997  (print-states nil)
998  (print-goto-graph nil) (print-lookaheads nil))
999  "Returns a parser for the given grammar.
1000 If MUFFLE-CONFLICTS is NIL, then a warning will be signaled for all conflicts.
1001 If it is T, then no warnings will be signaled. If it is a list of the form
1002 (SR SS), then a warning will be signaled unless there are exactly SR
1003 shift-reduce conflicts and SS shift-shift conflicts."
1004  (declare (type grammar grammar))
1005  (let ((kernels (compute-kernels grammar)))
1006  (compute-all-lookaheads kernels grammar)
1007  (number-kernels kernels)
1008  (when print-derives-epsilon (print-derives-epsilon grammar))
1009  (when print-first-terminals (print-first-terminals grammar))
1010  (when print-goto-graph (print-goto-graph kernels))
1011  (when (or print-states print-lookaheads)
1012  (print-states kernels print-lookaheads))
1013  (prog1
1014  (compute-parsing-tables kernels grammar
1015  :muffle-conflicts muffle-conflicts)
1016  (when discard-memos (grammar-discard-memos grammar)))))
1017 
1018 (define-condition yacc-runtime-error (error)
1019  ()
1020 )
1021 
1022 (define-condition yacc-parse-error (yacc-runtime-error)
1023  ((terminal :initarg :terminal :reader yacc-parse-error-terminal)
1024  (value :initarg :value :reader yacc-parse-error-value)
1025  (expected-terminals :initarg :expected-terminals
1026  :reader yacc-parse-error-expected-terminals))
1027  (:report (lambda (e stream)
1028  (format stream "Unexpected terminal ~S (value ~S). ~@:_~
1029  Expected one of: ~S"
1030  (yacc-parse-error-terminal e)
1031  (yacc-parse-error-value e)
1032  (yacc-parse-error-expected-terminals e)))))
1033 
1034 (defun parse-with-lexer (lexer parser)
1035 "Parse the stream of symbols provided by LEXER using PARSER.
1036 LEXER is a function of no arguments returning a symbol and a semantic value,
1037 and should return (VALUES NIL NIL) when the end of input is reached.
1038 Handle YACC-PARSE-ERROR to provide custom error reporting."
1039  (declare (type (function () (values symbol t)) lexer))
1040  (declare (type parser parser))
1041  (let ((action-array (parser-action parser))
1042  (goto-array (parser-goto parser)))
1043  (flet ((action (i a)
1044  (declare (type index i) (symbol a))
1045  (cdr (assoc a (aref action-array i))))
1046  (goto (i a)
1047  (declare (type index i) (symbol a))
1048  (or (cdr (assoc a (aref goto-array i)))
1049  (error "This cannot happen."))))
1050  (let ((stack (list 0)) symbol value)
1051  (flet ((next-symbol ()
1052  (multiple-value-bind (s v) (funcall lexer)
1053  (setq symbol (or s 'yacc-eof-symbol) value v))))
1054  (next-symbol)
1055  (loop
1056  (let* ((state (car stack))
1057  (action (action state symbol)))
1058  (etypecase action
1059  (shift-action
1060  (push value stack)
1061  (push (shift-action-state action) stack)
1062  (next-symbol))
1063  (reduce-action
1064  (let ((vals '()))
1065  (dotimes (n (reduce-action-length action))
1066  (pop stack)
1067  (push (pop stack) vals))
1068  (let ((s* (car stack)))
1069  (push (apply (reduce-action-action action) vals) stack)
1070  (push (goto s* (reduce-action-symbol action)) stack))))
1071  (accept-action
1072  (pop stack)
1073  (return (pop stack)))
1074  (null
1075  (error (make-condition
1076  'yacc-parse-error
1077  :terminal (if (eq symbol 'yacc-eof-symbol) nil symbol)
1078  :value value
1079  :expected-terminals
1080  (mapcan
1081  #'(lambda (e)
1082  (and (cdr e)
1083  (list
1084  (if (eq (car e) 'yacc-eof-symbol)
1085  nil
1086  (car e)))))
1087  (aref action-array state)))))))))))))
1088 
1089 ;;; User interface
1090 
1091 (defun parse-production (form)
1092  (let ((symbol (car form))
1093  (productions '()))
1094  (dolist (stuff (cdr form))
1095  (cond
1096  ((and (symbolp stuff) (not (null stuff)))
1097  (push (make-production symbol (list stuff)
1098  :action #'identity :action-form '#'identity)
1099  productions))
1100  ((listp stuff)
1101  (let ((l (car (last stuff))))
1102  (let ((rhs (if (symbolp l) stuff (butlast stuff)))
1103  (action (if (symbolp l) '#'list l)))
1104  (push (make-production symbol rhs
1105  :action (eval action)
1106  :action-form action)
1107  productions))))
1108  (t (error "Unexpected production ~S" stuff))))
1109  productions))
1110 
1111 (defun parse-grammar (forms)
1112  (let ((options '()) (make-options '()) (productions '()))
1113  (dolist (form forms)
1114  (cond
1115  ((member (car form)
1116  '(:muffle-conflicts
1117  :print-derives-epsilon :print-first-terminals
1118  :print-states :print-goto-graph :print-lookaheads))
1119  (unless (null (cddr form))
1120  (error "Malformed option ~S" form))
1121  (push (car form) make-options)
1122  (push (cadr form) make-options))
1123  ((keywordp (car form))
1124  (unless (null (cddr form))
1125  (error "Malformed option ~S" form))
1126  (push (car form) options)
1127  (push (cadr form) options))
1128  ((symbolp (car form))
1129  (setq productions (nconc (parse-production form) productions)))
1130  (t
1131  (error "Unexpected grammar production ~S" form))))
1132  (values (nreverse options) (nreverse make-options)
1133  (nreverse productions))))
1134 
1135 (defmacro define-grammar (name &body body)
1136  "DEFINE-GRAMMAR NAME OPTION... PRODUCTION...
1137 PRODUCTION ::= (SYMBOL RHS...)
1138 RHS ::= SYMBOL | (SYMBOL... [ACTION])
1139 Defines the special variable NAME to be a grammar. Options are as in
1140 MAKE-GRAMMAR."
1141  (multiple-value-bind (options make-options productions) (parse-grammar body)
1142  (unless (null make-options)
1143  (warn "DEFINE-GRAMMAR ignores options ~S" make-options))
1144  `(defparameter ,name
1145  ',(apply #'make-grammar
1146  :name name
1147  :productions productions
1148  options))))
1149 
1150 (defmacro define-parser (name &body body)
1151  "DEFINE-GRAMMAR NAME OPTION... PRODUCTION...
1152 PRODUCTION ::= (SYMBOL RHS...)
1153 RHS ::= SYMBOL | (SYMBOL... [ACTION])
1154 Defines the special variable NAME to be a parser. Options are as in
1155 MAKE-GRAMMAR and MAKE-PARSER."
1156  (multiple-value-bind (options make-options productions) (parse-grammar body)
1157  `(defparameter ,name
1158  ',(apply #'make-parser
1159  (apply #'make-grammar
1160  :name name
1161  :productions productions
1162  options)
1163  make-options))))
1164 
1165 ;;; Support for fasdumping grammars and parsers.
1166 
1167 (defmethod make-load-form ((p production) &optional env)
1168  (declare (ignore env))
1169  (when (null (production-action-form p))
1170  (error "Production ~S cannot be dumped (it has no action form)" p))
1171  (values
1172  `(make-production ',(production-symbol p) ',(production-derives p))
1173  `(setf (production-action-form ,p) ',(production-action-form p)
1174  (production-action ,p) (eval ',(production-action-form p)))))
1175 
1176 (defmethod make-load-form ((g grammar) &optional env)
1177  (make-load-form-saving-slots g :environment env))
1178 
1179 (defmethod make-load-form ((p parser) &optional env)
1180  (make-load-form-saving-slots p :environment env))
1181 
1182 (defmethod make-load-form ((a accept-action) &optional env)
1183  (declare (ignore env))
1184  `(make-accept-action))
1185 
1186 (defmethod make-load-form ((a reduce-action) &optional env)
1187  (declare (ignore env))
1188  (when (null (reduce-action-action-form a))
1189  (error "Action ~S cannot be dumped (it has no action form)" a))
1190  (values
1191  `(make-reduce-action ',(reduce-action-symbol a) ',(reduce-action-length a))
1192  `(setf (reduce-action-action-form ,a) ',(reduce-action-action-form a)
1193  (reduce-action-action ,a) (eval ',(reduce-action-action-form a)))))
1194 
1195 (defmethod make-load-form ((a shift-action) &optional env)
1196  (declare (ignore env))
1197  `(make-shift-action ',(shift-action-state a)))