changelog shortlog graph tags branches changeset files file revisions raw help

Mercurial > core / annotate lisp/lib/dat/html/html.lisp

changeset 667: bb8aa1eda12b
parent: a3b65a8138ac
author: Richard Westhaver <ellis@rwest.io>
date: Mon, 23 Sep 2024 17:03:54 -0400
permissions: -rw-r--r--
description: graph, css vars, corfu-terminal fix
240
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1
 ;;; dat/html.lisp --- HTML parser
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3
 ;; see https://github.com/rotatef/cl-html5-parser
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5
 ;; spec: https://html.spec.whatwg.org/
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
6
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
7
 ;;; Commentary:
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
8
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
9
 ;; HTML is usually associated with XML, but not all HTML is valid
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
10
 ;; XML.
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
11
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
12
 ;; This package provides a pretty good HTML parser, with the default
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
13
 ;; DOM being the one used in our XML package DAT/XML.
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
14
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
15
 ;;; Code:
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
16
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
17
 ;;; inputstream
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
18
 (in-package :dat/html)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
19
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
20
 (deftype array-length ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
21
   "Type of an array index."
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
22
   '(integer 0 #.array-dimension-limit))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
23
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
24
 (deftype chunk ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
25
   "Type of the input stream buffer."
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
26
   '(vector character *))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
27
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
28
 (defparameter *default-encoding* :utf-8)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
29
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
30
 (defclass html-input-stream ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
31
   ((source :initarg :source)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
32
    (encoding :reader html5-stream-encoding)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
33
    (char-stream :initform nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
34
    (chunk)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
35
    (chunk-offset)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
36
    (pending-cr)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
37
    (errors :initform nil :accessor html5-stream-errors)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
38
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
39
 (defun make-html-input-stream (source &key override-encoding fallback-encoding)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
40
   (when (stringp source)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
41
     ;; Encoding is not relevant when input is a string,
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
42
     ;; but we set it utf-8 here to avoid auto detecting taking place.
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
43
     (setf override-encoding :utf-8))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
44
   (let ((self (make-instance 'html-input-stream :source source)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
45
     (with-slots (encoding stream) self
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
46
       (setf encoding (detect-encoding self
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
47
                                       (find-encoding override-encoding)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
48
                                       (find-encoding fallback-encoding)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
49
       (open-char-stream self))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
50
     self))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
51
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
52
 ;; 12.2.2.2 Character encodings
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
53
 (defun find-encoding (encoding-name)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
54
   ;; Normalize the string designator
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
55
   (setf encoding-name (string-upcase (substitute #\- #\_ (string-trim +space-characters+ (string encoding-name)))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
56
   ;; All known encoding will already be interned in the keyword package so find-symbol is fine here
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
57
   (setf encoding-name (find-symbol encoding-name :keyword))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
58
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
59
   (handler-case
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
60
       ;; Verfiy that flexi-streams knows the encoding and resolve aliases
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
61
       (case (flex:external-format-name (flex:make-external-format encoding-name))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
62
         ;; Some encoding should be replaced by some other.
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
63
         ;; Only those supported by flexi-streams are listed here.
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
64
         ;; iso-8859-11 should be replaced by windows-874, but flexi-streams doesn't that encoding.
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
65
         (:iso-8859-1 :windows-1252)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
66
         (:iso-8859-9 :windows-1254)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
67
         (:us-ascii :windows-1252)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
68
         (otherwise encoding-name))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
69
     (flex:external-format-error ())))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
70
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
71
 ;; 12.2.2.1 Determining the character encoding
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
72
 (defun detect-encoding (stream override-encoding fallback-encoding)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
73
   (with-slots (encoding) stream
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
74
     (block nil
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
75
       ;; 1. and 2. encoding overridden by user or transport layer
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
76
       (when override-encoding
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
77
         (return (cons override-encoding :certain)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
78
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
79
       ;; 3. wait for 1024 bytes, not implemented
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
80
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
81
       ;; 4. Detect BOM
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
82
       (let ((bom-encoding (detect-bom stream)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
83
         (when bom-encoding
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
84
           (return (cons bom-encoding :certain))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
85
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
86
       ;; 5. Prescan not implemented
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
87
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
88
       ;; 6. Use fallback encoding
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
89
       (when fallback-encoding
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
90
         (return (cons encoding :tentative)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
91
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
92
       ;; 7. Autodect not implemented
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
93
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
94
       ;; 8. Implementation-defined default
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
95
       (return (cons *default-encoding* :tentative)))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
96
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
97
 (defmacro handle-encoding-errors (stream &body body)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
98
   `(handler-bind ((flex:external-format-encoding-error
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
99
                    (lambda (x)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
100
                      (declare (ignore x))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
101
                      (push :invalid-codepoint (html5-stream-errors ,stream))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
102
                      (use-value #\uFFFD))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
103
      ,@body))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
104
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
105
 (defun open-char-stream (self)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
106
   (with-slots (source encoding char-stream chunk chunk-offset pending-cr) self
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
107
     (setf chunk (make-array (* 10 1024) :element-type 'character :fill-pointer 0))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
108
     (setf chunk-offset 0)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
109
     (setf pending-cr nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
110
     (when char-stream
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
111
       (close char-stream))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
112
     (setf char-stream
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
113
           (if (stringp source)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
114
               (make-string-input-stream source)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
115
               (flex:make-flexi-stream
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
116
                (etypecase source
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
117
                  (pathname
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
118
                   (open source :element-type '(unsigned-byte 8)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
119
                  (stream
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
120
                   source)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
121
                  (vector
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
122
                   (flex:make-in-memory-input-stream source)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
123
                :external-format (flex:make-external-format (car encoding) :eol-style :lf))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
124
     ;; 12.2.2.4 says we should always skip the first byte order mark
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
125
     (handle-encoding-errors self
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
126
       (let ((first-char (peek-char nil char-stream nil)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
127
         (when (eql first-char #\ufeff)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
128
           (read-char char-stream))))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
129
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
130
 (defun detect-bom (self)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
131
   (with-slots (source) self
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
132
     (let (byte-0 byte-1 byte-2)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
133
       (etypecase source
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
134
         (vector
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
135
          (when (> (length source) 0) (setf byte-0 (aref source 0)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
136
          (when (> (length source) 1) (setf byte-1 (aref source 1)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
137
          (when (> (length source) 2) (setf byte-2 (aref source 2))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
138
         (pathname
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
139
          (with-open-file (in source :element-type '(unsigned-byte 8))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
140
            (setf byte-0 (read-byte in nil))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
141
            (setf byte-1 (read-byte in nil))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
142
            (setf byte-2 (read-byte in nil))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
143
         (stream
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
144
          (error "Can't detect encoding when source is a stream.")))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
145
       (cond ((and (eql byte-0 #xfe)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
146
                   (eql byte-1 #xff))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
147
              :utf-16be)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
148
             ((and (eql byte-0 #xff)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
149
                   (eql byte-1 #xfe))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
150
              :utf-16le)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
151
             ((and (eql byte-0 #xef)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
152
                   (eql byte-1 #xbb)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
153
                   (eql byte-2 #xbf))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
154
              :utf-8)))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
155
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
156
 ;; 12.2.2.3 Changing the encoding while parsing
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
157
 (defun html5-stream-change-encoding (stream new-encoding)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
158
   (setf new-encoding (find-encoding new-encoding))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
159
   (with-slots (encoding char-stream) stream
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
160
     ;; 1.
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
161
     (when (member (car encoding) '(:utf-16le :utf-16be))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
162
       (setf encoding (cons (car encoding) :certain))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
163
       (return-from html5-stream-change-encoding))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
164
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
165
     ;; 2.
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
166
     (when (member new-encoding '(:utf-16le :utf-16be))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
167
       (setf new-encoding :utf-8))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
168
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
169
     ;; 3.
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
170
     (when (eql (car encoding) new-encoding)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
171
       (setf encoding (cons (car encoding) :certain))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
172
       (return-from html5-stream-change-encoding))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
173
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
174
     ;; 4. Not impleneted
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
175
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
176
     ;; 5. Restart paring from scratch
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
177
     (setf encoding (cons new-encoding :certain))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
178
     (open-char-stream stream)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
179
     (throw 'please-reparse t)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
180
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
181
 (defun html5-stream-char (stream)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
182
   (with-slots (chunk chunk-offset) stream
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
183
     (when (>= chunk-offset (length chunk))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
184
       (unless (read-chunk stream)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
185
         (return-from html5-stream-char +eof+)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
186
     (prog1 (char chunk chunk-offset)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
187
       (incf chunk-offset))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
188
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
189
 (defun our-scan (chars opposite-p chunk &key start)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
190
   (loop for i from start below (length chunk)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
191
         for char = (char chunk i)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
192
         while (if opposite-p
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
193
                   (position char chars)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
194
                   (not (position char chars)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
195
         finally (return i)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
196
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
197
 (defun html5-stream-chars-until (stream characters &optional opposite-p)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
198
   "Returns a string of characters from the stream up to but not
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
199
    including any character in characters or end of file.
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
200
    "
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
201
   (with-slots (chunk chunk-offset) stream
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
202
     (declare (array-length chunk-offset) (chunk chunk))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
203
     (with-output-to-string (data)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
204
       (loop for end = (our-scan characters opposite-p chunk :start chunk-offset) do
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
205
             ;; If nothing matched, and it wasn't because we ran out of chunk,
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
206
             ;; then stop
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
207
             (when (and (not end)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
208
                        (/= chunk-offset (length chunk)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
209
               (return))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
210
             ;; If not the whole chunk matched, return everything
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
211
             ;; up to the part that didn't match
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
212
             (when (and end
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
213
                        (/= chunk-offset (length chunk)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
214
               (write-string chunk data :start chunk-offset :end end)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
215
               (setf chunk-offset end)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
216
               (return))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
217
             ;; If the whole remainder of the chunk matched,
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
218
             ;; use it all and read the next chunk
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
219
             (write-string chunk data :start chunk-offset)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
220
             (unless (read-chunk stream)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
221
               (return))))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
222
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
223
 (defun html5-stream-unget (stream char)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
224
   (with-slots (chunk chunk-offset) stream
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
225
     (unless (eql char +eof+)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
226
       (cond ((zerop chunk-offset)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
227
              (cond ((< (fill-pointer chunk) (array-dimension chunk 0))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
228
                     (incf (fill-pointer chunk))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
229
                     (replace chunk chunk :start1 1))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
230
                    (t
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
231
                     (let ((new-chunk (make-array (1+ (array-dimension chunk 0))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
232
                                                  :element-type 'character
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
233
                                                  :fill-pointer (1+ (fill-pointer chunk)))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
234
                       (replace new-chunk chunk :start1 1)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
235
                       (setf chunk new-chunk))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
236
              (setf (char chunk 0) char))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
237
             (t
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
238
              (decf chunk-offset)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
239
              (assert (char= char (char chunk chunk-offset))))))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
240
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
241
 (defun read-chunk (stream)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
242
   (declare (optimize speed))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
243
   (with-slots (char-stream chunk chunk-offset pending-cr) stream
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
244
     (declare (array-length chunk-offset)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
245
              (chunk chunk))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
246
     (setf chunk-offset 0)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
247
     (let ((start 0))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
248
       (when pending-cr
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
249
         (setf (char chunk 0) #\Return)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
250
         (setf start 1)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
251
         (setf pending-cr nil))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
252
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
253
       (setf (fill-pointer chunk) (array-dimension chunk 0))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
254
       (handle-encoding-errors stream
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
255
         (setf (fill-pointer chunk) (read-sequence chunk char-stream :start start)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
256
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
257
       (unless (zerop (length chunk))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
258
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
259
         ;; check if last char is CR and EOF was not reached
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
260
         (when (and (= (length chunk) (array-dimension chunk 0))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
261
                    (eql (char chunk (1- (length chunk))) #\Return))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
262
           (setf pending-cr t)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
263
           (decf (fill-pointer chunk)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
264
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
265
         (report-character-errors stream chunk)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
266
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
267
         ;; Python code replaces surrugate pairs with U+FFFD here. Why?
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
268
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
269
         ;; Normalize line endings (CR LF)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
270
         (loop for previous = nil then current
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
271
            for current across chunk
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
272
            for index of-type array-length from 0
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
273
            with offset of-type array-length = 0
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
274
            do (unless (and (eql previous #\Return)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
275
                            (eql current #\Newline))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
276
                 (unless (= index offset)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
277
                   (setf (char chunk offset) current))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
278
                 (when (eql current #\Return)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
279
                   (setf (char chunk offset) #\Newline))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
280
                 (incf offset))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
281
            finally (setf (fill-pointer chunk) offset))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
282
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
283
         t))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
284
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
285
 (defun char-range (char1 char2)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
286
   (loop for i from (char-code char1) to (char-code char2)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
287
         collect (code-char i)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
288
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
289
 (defparameter *invalid-unicode*
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
290
   `(,@(char-range #\u0001 #\u0008)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
291
     #\u000B
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
292
     ,@(char-range #\u000E #\u001F)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
293
     ,@(char-range #\u007F #\u009F)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
294
     ;; The following are noncharacter as defined by Unicode.
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
295
     ;; Clozure Common Lisp doesn't like them.
667
bb8aa1eda12b graph, css vars, corfu-terminal fix
Richard Westhaver <ellis@rwest.io>
parents: 240
diff changeset
296
     ,@`(
bb8aa1eda12b graph, css vars, corfu-terminal fix
Richard Westhaver <ellis@rwest.io>
parents: 240
diff changeset
297
         ,@(char-range #\uD800 #\uDFFF)
bb8aa1eda12b graph, css vars, corfu-terminal fix
Richard Westhaver <ellis@rwest.io>
parents: 240
diff changeset
298
         ,@(char-range #\uFDD0 #\uFDEF)
bb8aa1eda12b graph, css vars, corfu-terminal fix
Richard Westhaver <ellis@rwest.io>
parents: 240
diff changeset
299
         #\uFFFE
bb8aa1eda12b graph, css vars, corfu-terminal fix
Richard Westhaver <ellis@rwest.io>
parents: 240
diff changeset
300
         #\uFFFF
bb8aa1eda12b graph, css vars, corfu-terminal fix
Richard Westhaver <ellis@rwest.io>
parents: 240
diff changeset
301
         #\u0001FFFE
bb8aa1eda12b graph, css vars, corfu-terminal fix
Richard Westhaver <ellis@rwest.io>
parents: 240
diff changeset
302
         #\u0001FFFF
bb8aa1eda12b graph, css vars, corfu-terminal fix
Richard Westhaver <ellis@rwest.io>
parents: 240
diff changeset
303
         #\u0002FFFE
bb8aa1eda12b graph, css vars, corfu-terminal fix
Richard Westhaver <ellis@rwest.io>
parents: 240
diff changeset
304
         #\u0002FFFF
bb8aa1eda12b graph, css vars, corfu-terminal fix
Richard Westhaver <ellis@rwest.io>
parents: 240
diff changeset
305
         #\u0003FFFE
bb8aa1eda12b graph, css vars, corfu-terminal fix
Richard Westhaver <ellis@rwest.io>
parents: 240
diff changeset
306
         #\u0003FFFF
bb8aa1eda12b graph, css vars, corfu-terminal fix
Richard Westhaver <ellis@rwest.io>
parents: 240
diff changeset
307
         #\u0004FFFE
bb8aa1eda12b graph, css vars, corfu-terminal fix
Richard Westhaver <ellis@rwest.io>
parents: 240
diff changeset
308
         #\u0004FFFF
bb8aa1eda12b graph, css vars, corfu-terminal fix
Richard Westhaver <ellis@rwest.io>
parents: 240
diff changeset
309
         #\u0005FFFE
bb8aa1eda12b graph, css vars, corfu-terminal fix
Richard Westhaver <ellis@rwest.io>
parents: 240
diff changeset
310
         #\u0005FFFF
bb8aa1eda12b graph, css vars, corfu-terminal fix
Richard Westhaver <ellis@rwest.io>
parents: 240
diff changeset
311
         #\u0006FFFE
bb8aa1eda12b graph, css vars, corfu-terminal fix
Richard Westhaver <ellis@rwest.io>
parents: 240
diff changeset
312
         #\u0006FFFF
bb8aa1eda12b graph, css vars, corfu-terminal fix
Richard Westhaver <ellis@rwest.io>
parents: 240
diff changeset
313
         #\u0007FFFE
bb8aa1eda12b graph, css vars, corfu-terminal fix
Richard Westhaver <ellis@rwest.io>
parents: 240
diff changeset
314
         #\u0007FFFF
bb8aa1eda12b graph, css vars, corfu-terminal fix
Richard Westhaver <ellis@rwest.io>
parents: 240
diff changeset
315
         #\u0008FFFE
bb8aa1eda12b graph, css vars, corfu-terminal fix
Richard Westhaver <ellis@rwest.io>
parents: 240
diff changeset
316
         #\u0008FFFF
bb8aa1eda12b graph, css vars, corfu-terminal fix
Richard Westhaver <ellis@rwest.io>
parents: 240
diff changeset
317
         #\u0009FFFE
bb8aa1eda12b graph, css vars, corfu-terminal fix
Richard Westhaver <ellis@rwest.io>
parents: 240
diff changeset
318
         #\u0009FFFF
bb8aa1eda12b graph, css vars, corfu-terminal fix
Richard Westhaver <ellis@rwest.io>
parents: 240
diff changeset
319
         #\u000AFFFE
bb8aa1eda12b graph, css vars, corfu-terminal fix
Richard Westhaver <ellis@rwest.io>
parents: 240
diff changeset
320
         #\u000AFFFF
bb8aa1eda12b graph, css vars, corfu-terminal fix
Richard Westhaver <ellis@rwest.io>
parents: 240
diff changeset
321
         #\u000BFFFE
bb8aa1eda12b graph, css vars, corfu-terminal fix
Richard Westhaver <ellis@rwest.io>
parents: 240
diff changeset
322
         #\u000BFFFF
bb8aa1eda12b graph, css vars, corfu-terminal fix
Richard Westhaver <ellis@rwest.io>
parents: 240
diff changeset
323
         #\u000CFFFE
bb8aa1eda12b graph, css vars, corfu-terminal fix
Richard Westhaver <ellis@rwest.io>
parents: 240
diff changeset
324
         #\u000CFFFF
bb8aa1eda12b graph, css vars, corfu-terminal fix
Richard Westhaver <ellis@rwest.io>
parents: 240
diff changeset
325
         #\u000DFFFE
bb8aa1eda12b graph, css vars, corfu-terminal fix
Richard Westhaver <ellis@rwest.io>
parents: 240
diff changeset
326
         #\u000DFFFF
bb8aa1eda12b graph, css vars, corfu-terminal fix
Richard Westhaver <ellis@rwest.io>
parents: 240
diff changeset
327
         #\u000EFFFE
bb8aa1eda12b graph, css vars, corfu-terminal fix
Richard Westhaver <ellis@rwest.io>
parents: 240
diff changeset
328
         #\u000EFFFF
bb8aa1eda12b graph, css vars, corfu-terminal fix
Richard Westhaver <ellis@rwest.io>
parents: 240
diff changeset
329
         #\u000FFFFE
bb8aa1eda12b graph, css vars, corfu-terminal fix
Richard Westhaver <ellis@rwest.io>
parents: 240
diff changeset
330
         #\u000FFFFF
bb8aa1eda12b graph, css vars, corfu-terminal fix
Richard Westhaver <ellis@rwest.io>
parents: 240
diff changeset
331
         #\u0010FFFE
bb8aa1eda12b graph, css vars, corfu-terminal fix
Richard Westhaver <ellis@rwest.io>
parents: 240
diff changeset
332
         #\u0010FFFF)))
240
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
333
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
334
 (defparameter *invalid-unicode-hash* (make-hash-table))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
335
 (dolist (char *invalid-unicode*)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
336
   (setf (gethash char *invalid-unicode-hash*) char))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
337
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
338
 (defun report-character-errors (stream data)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
339
   (loop for char across data
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
340
         when (gethash char *invalid-unicode-hash*)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
341
           do (push :invalid-codepoint (html5-stream-errors stream))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
342
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
343
 ;;; Tokenizer
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
344
 (defclass html-tokenizer ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
345
   ((stream :initarg :stream :reader tokenizer-stream)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
346
    (cdata-switch-helper :initarg :cdata-switch-helper
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
347
                         :initform (constantly nil))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
348
    (lowercase-element-name :initform t)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
349
    (lowercase-attr-name :initform t)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
350
    (escape-flag :initform nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
351
    (last-four-chars :initform nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
352
    (state :initform :data-state :accessor tokenizer-state)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
353
    (escape :initform nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
354
    (current-token :initform nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
355
    (token-queue :initform nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
356
    (temporary-buffer :initform nil)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
357
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
358
 (defun make-html-tokenizer (source &key encoding cdata-switch-helper)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
359
   (make-instance 'html-tokenizer
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
360
                  :stream (make-html-input-stream source :override-encoding encoding)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
361
                  :cdata-switch-helper cdata-switch-helper))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
362
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
363
 (defun map-tokens (tokenizer function)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
364
   "Return next token or NIL on eof"
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
365
   (with-slots (token-queue stream) tokenizer
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
366
     (loop while (run-state tokenizer) do
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
367
          (setf token-queue (nreverse token-queue))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
368
          (loop while (html5-stream-errors stream)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
369
             do (funcall function (list :type :parse-error :data (pop (html5-stream-errors stream)))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
370
          (loop while token-queue
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
371
             do (funcall function (pop token-queue))))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
372
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
373
 (defun run-state (tokenizer)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
374
   (run-state* tokenizer (slot-value tokenizer 'state)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
375
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
376
 (defgeneric run-state* (tokenizer state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
377
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
378
 (defmacro defstate (state (&rest slots) &body body)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
379
   `(defmethod run-state* (self (state (eql ,state)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
380
      (with-slots (,@slots) self
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
381
        (block nil
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
382
          ,@body
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
383
          t))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
384
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
385
 (defun push-token (self token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
386
   (with-slots (token-queue) self
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
387
     (push token token-queue)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
388
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
389
 (defun make-growable-string (&optional (init ""))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
390
   "Make an adjustable string with a fill pointer.
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
391
 Given INIT, a string, return an adjustable version of it with the fill
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
392
 pointer at the end."
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
393
   (let ((string
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
394
           (make-array (max 5 (length init))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
395
                       :element-type 'character
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
396
                       :adjustable t
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
397
                       :fill-pointer (length init))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
398
     (when init
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
399
       (replace string init))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
400
     string))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
401
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
402
 (defun nconcat (string &rest data)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
403
   "Destructively concatenate DATA, string designators, to STRING."
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
404
   (declare (optimize speed))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
405
   (unless (array-has-fill-pointer-p string)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
406
     (setf string (make-growable-string string)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
407
   (labels ((conc (string x)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
408
              (typecase x
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
409
                (character
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
410
                 (vector-push-extend x string))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
411
                (string
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
412
                 (let ((len (length x)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
413
                   (loop for c across x do
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
414
                     (vector-push-extend c string len))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
415
                (symbol (conc string (string x))))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
416
     (dolist (x data string)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
417
       (conc string x))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
418
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
419
 (define-modify-macro nconcatf (&rest data) nconcat)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
420
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
421
 (defun push-token* (self type &rest data)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
422
   "Push a token with :type type and :data the a string concatenation of data"
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
423
   (push-token self (list :type type
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
424
                          :data (apply #'nconcat (make-growable-string) data))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
425
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
426
 (defun add-attribute (token name)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
427
   (setf (getf token :data) (append (getf token :data)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
428
                                    (list (cons (make-growable-string (string name))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
429
                                                (make-growable-string))))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
430
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
431
 (defun add-to-attr-name (token &rest data)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
432
   (setf (caar (last (getf token :data)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
433
         (apply #'nconcat
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
434
                (caar (last (getf token :data)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
435
                data)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
436
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
437
 (defun add-to-attr-value (token &rest data)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
438
   (setf (cdar (last (getf token :data)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
439
         (apply #'nconcat
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
440
                (cdar (last (getf token :data)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
441
                data)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
442
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
443
 (defun add-to (token indicator &rest data)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
444
   (setf (getf token indicator)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
445
         (apply #'nconcat
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
446
                (getf token indicator)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
447
                data)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
448
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
449
 (defun consume-number-entity (self is-hex)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
450
   "This function returns either U+FFFD or the character based on the
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
451
    decimal or hexadecimal representation. It also discards \";\" if present.
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
452
    If not present a token (:type :parse-error) is emitted.
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
453
   "
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
454
   (with-slots (stream) self
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
455
     (let ((allowed +digits+)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
456
           (radix 10)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
457
           (char-stack)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
458
           (c)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
459
           (char-as-int)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
460
           (char))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
461
       (when is-hex
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
462
         (setf allowed +hex-digits+)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
463
         (setf radix 16))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
464
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
465
       ;; Consume all the characters that are in range while making sure we
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
466
       ;; don't hit an EOF.
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
467
       (setf c (html5-stream-char stream))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
468
       (loop while (and (find c allowed) (not (eql c +eof+))) do
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
469
            (push c char-stack)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
470
            (setf c (html5-stream-char stream)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
471
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
472
       ;; Convert the set of characters consumed to an int.
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
473
       (setf char-as-int (parse-integer (coerce (nreverse char-stack) 'string) :radix radix))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
474
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
475
       ;; Certain characters get replaced with others
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
476
       (cond ((find char-as-int +replacement-characters+)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
477
              (setf char (getf +replacement-characters+ char-as-int))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
478
              (push-token self `(:type :parse-error
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
479
                                       :data :illegal-codepoint-for-numeric-entity
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
480
                                       :datavars '(:char-as-int ,char-as-int))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
481
             ((or (<= #xD800 char-as-int #xDFFF)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
482
                  (> char-as-int #x10FFFF))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
483
              (setf char #\uFFFD)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
484
              (push-token self `(:type :parse-error
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
485
                                       :data :illegal-codepoint-for-numeric-entity
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
486
                                       :datavars '(:char-as-int ,char-as-int))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
487
             (t
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
488
              ;; Python comment: Should speed up this check somehow (e.g. move the set to a constant)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
489
              (when (or (<= #x0001 char-as-int #x0008)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
490
                        (<= #x000E char-as-int #x001F)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
491
                        (<= #x007F char-as-int #x009F)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
492
                        (<= #xFDD0 char-as-int #xFDEF)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
493
                        (find char-as-int
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
494
                              #(#x000B #xFFFE #xFFFF #x1FFFE
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
495
                                #x1FFFF #x2FFFE #x2FFFF #x3FFFE
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
496
                                #x3FFFF #x4FFFE #x4FFFF #x5FFFE
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
497
                                #x5FFFF #x6FFFE #x6FFFF #x7FFFE
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
498
                                #x7FFFF #x8FFFE #x8FFFF #x9FFFE
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
499
                                #x9FFFF #xAFFFE #xAFFFF #xBFFFE
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
500
                                #xBFFFF #xCFFFE #xCFFFF #xDFFFE
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
501
                                #xDFFFF #xEFFFE #xEFFFF #xFFFFE
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
502
                                #xFFFFF #x10FFFE #x10FFFF)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
503
                (push-token self `(:type :parse-error
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
504
                                         :data :illegal-codepoint-for-numeric-entity
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
505
                                         :datavars '(:char-as-int ,char-as-int))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
506
              ;; Assume char-code-limit >= 1114112
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
507
              (setf char (code-char char-as-int))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
508
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
509
       ;; Discard the ; if present. Otherwise, put it back on the queue and
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
510
       ;; invoke parseError on parser.
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
511
       (unless (eql c #\;)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
512
         (push-token self `(:type :parse-error :data :numeric-entity-without-semicolon))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
513
         (html5-stream-unget stream c))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
514
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
515
       (string char))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
516
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
517
 (defun consume-entity (self &key allowed-char from-attribute)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
518
   (with-slots (stream current-token) self
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
519
     (let ((output "&")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
520
           (stack (list (html5-stream-char stream))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
521
       (cond ((or (find (car stack) +space-characters+)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
522
                  (find (car stack) '(+eof+ #\< #\&))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
523
                  (and allowed-char (eql allowed-char (car stack))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
524
              (html5-stream-unget stream (car stack)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
525
             ((eql (car stack) #\#)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
526
              (push (html5-stream-char stream) stack)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
527
              (let ((is-hex (find (car stack) "xX")))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
528
                (when is-hex
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
529
                  (push (html5-stream-char stream) stack))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
530
                (cond ((find (car stack) (if is-hex +hex-digits+ +digits+))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
531
                       (html5-stream-unget stream (car stack))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
532
                       (setf output (consume-number-entity self is-hex)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
533
                      (t
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
534
                       (push-token self '(:type :parse-error :data :expected-numeric-entity))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
535
                       (html5-stream-unget stream (pop stack))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
536
                       (when is-hex
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
537
                         (html5-stream-unget stream (pop stack)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
538
                       (html5-stream-unget stream (pop stack))))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
539
             (t
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
540
              ;; Consume the maximum number of characters possible, with the
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
541
              ;; consumed characters matching one of the identifiers in the first
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
542
              ;; column of the named character references table
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
543
              ;; (in a case-sensitive manner).
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
544
              (let ((entity)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
545
                    (match-at 0))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
546
                (loop with node = *entities-tree*
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
547
                      for char = (car stack) then (car (push (html5-stream-char stream)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
548
                                                             stack))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
549
                      for next-node = (assoc char node)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
550
                      while next-node
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
551
                      do (when (second next-node)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
552
                           (setf entity (second next-node))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
553
                           (setf match-at (length stack)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
554
                      do (setf node (cddr next-node)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
555
                (let ((next-char))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
556
                  ;; Unconsume those characters that are not part of the match
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
557
                  ;; This unconsumes everything if there where no match
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
558
                  (loop until (= (length stack) match-at) do
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
559
                       (setf next-char (car stack))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
560
                       (html5-stream-unget stream (pop stack)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
561
                  (cond ((not entity)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
562
                         ;; If no match can be made, then no characters are consumed, and nothing is returned.
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
563
                         ;; Is this always a parse error really?
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
564
                         (push-token self '(:type :parse-error :data :expected-named-entity)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
565
                        ((and from-attribute
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
566
                              (not (eql #\; (car stack)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
567
                              (or (eql next-char #\=)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
568
                                  (find next-char +digits+)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
569
                                  (ascii-letter-p next-char)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
570
                         ; Is this a parse error really?
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
571
                         (push-token self '(:type :parse-error :data :bogus))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
572
                         (setf output (concatenate 'string "&" (reverse stack))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
573
                        (t
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
574
                         (unless (eql #\; (car stack))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
575
                           (push-token self '(:type :parse-error
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
576
                                              :data :named-entity-without-semicolon)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
577
                         (setf output entity)))))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
578
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
579
       (cond (from-attribute
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
580
              (add-to-attr-value current-token output))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
581
             (t
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
582
              (push-token* self (if (find (char output 0) +space-characters+)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
583
                                    :space-characters
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
584
                                    :characters)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
585
                           output))))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
586
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
587
 (defun process-entity-in-attribute (self &key allowed-char)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
588
   (consume-entity self :allowed-char allowed-char :from-attribute t))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
589
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
590
 (defun emit-current-token (self)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
591
   "This method is a generic handler for emitting the tags. It also sets
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
592
    the state to :data because that's what's needed after a token has been
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
593
    emitted.
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
594
   "
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
595
   (with-slots (current-token state lowercase-element-name) self
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
596
     (let ((token current-token))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
597
       ;; Add token to the queue to be yielded
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
598
       (when (find (getf token :type) +tag-token-types+)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
599
         (when lowercase-element-name
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
600
           (setf (getf token :name) (ascii-upper-2-lower (getf token :name))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
601
         (when (eql (getf token :type) :end-tag)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
602
           (when (getf token :data)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
603
             (push-token self '(:type :parse-error :data :attributes-in-end-tag)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
604
           (when (getf token :self-closing)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
605
             (push-token self '(:type :parse-error :data :self-closing-flag-on-end-tag)))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
606
       (push-token self token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
607
       (setf state :data-state))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
608
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
609
 ;;;
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
610
 ;;; Below are the various tokenizer states worked out.
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
611
 ;;;
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
612
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
613
 (defstate :data-state (stream state)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
614
   (let ((data (html5-stream-char stream)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
615
     (cond ((eql data #\&)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
616
            (setf state :entity-data-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
617
           ((eql data #\<)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
618
            (setf state :tag-open-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
619
           ((eql data #\u0000)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
620
            (push-token self '(:type :parse-error :data :invalid-codepoint))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
621
            (push-token* self :characters #\u0000))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
622
           ((eql data +eof+)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
623
            ;; Tokenization ends.
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
624
            (return nil))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
625
           ((find data +space-characters+)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
626
            ;; Directly after emitting a token you switch back to the "data
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
627
            ;; state". At that point spaceCharacters are important so they are
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
628
            ;; emitted separately.
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
629
            (push-token* self :space-characters
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
630
                         data
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
631
                         (html5-stream-chars-until stream +space-characters+ t))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
632
            ;; No need to update lastFourChars here, since the first space will
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
633
            ;; have already been appended to lastFourChars and will have broken
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
634
            ;; any <!-- or --> sequences
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
635
            )
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
636
           (t
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
637
            (push-token* self :characters
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
638
                         data
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
639
                         (html5-stream-chars-until stream '(#\& #\< #\u0000)))))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
640
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
641
 (defstate :entity-data-state (state)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
642
   (consume-entity self)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
643
   (setf state :data-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
644
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
645
 (defstate :rcdata-state (stream state)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
646
   (let ((data (html5-stream-char stream)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
647
     (cond ((eql data #\&)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
648
            (setf state :character-reference-in-rcdata))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
649
           ((eql data #\<)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
650
            (setf state :rcdata-less-than-sign-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
651
           ((eql data +eof+)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
652
            ;; Tokenization ends.
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
653
            (return nil))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
654
           ((eql data #\u0000)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
655
            (push-token self '(:type :parse-error :data :invalid-codepoint))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
656
            (push-token* self :characters #\uFFFD))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
657
           ((find data +space-characters+)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
658
            ;; Directly after emitting a token you switch back to the "data
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
659
            ;; state". At that point spaceCharacters are important so they are
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
660
            ;; emitted separately.
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
661
            (push-token* self :space-characters
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
662
                         data
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
663
                         (html5-stream-chars-until stream +space-characters+ t))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
664
            ;; No need to update lastFourChars here, since the first space will
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
665
            ;; have already been appended to lastFourChars and will have broken
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
666
            ;; any <!-- or --> sequences
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
667
            )
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
668
           (t
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
669
            (push-token* self :characters
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
670
                         data
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
671
                         (html5-stream-chars-until stream '(#\& #\<)))))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
672
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
673
 (defstate :character-reference-in-rcdata (state)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
674
   (consume-entity self)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
675
   (setf state :rcdata-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
676
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
677
 (defstate :rawtext-state (stream state)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
678
   (let ((data (html5-stream-char stream)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
679
     (cond ((eql data #\<)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
680
            (setf state :rawtext-less-than-sign-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
681
           ((eql data #\u0000)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
682
            (push-token self '(:type :parse-error :data :invalid-codepoint))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
683
            (push-token* self :characters #\uFFFD))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
684
           ((eql data +eof+)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
685
            ;; Tokenization ends.
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
686
            (return nil))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
687
           (t
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
688
            (push-token* self :characters
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
689
                         data
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
690
                         (html5-stream-chars-until stream '(#\< #\u0000)))))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
691
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
692
 (defstate :script-data-state (stream state)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
693
   (let ((data (html5-stream-char stream)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
694
     (cond ((eql data #\<)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
695
            (setf state :script-data-less-than-sign-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
696
           ((eql data #\u0000)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
697
            (push-token self '(:type :parse-error :data :invalid-codepoint))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
698
            (push-token* self :characters #\uFFFD))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
699
           ((eql data +eof+)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
700
            ;; Tokenization ends.
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
701
            (return nil))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
702
           (t
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
703
            (push-token* self :characters
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
704
                         data
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
705
                         (html5-stream-chars-until stream '(#\< #\u0000)))))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
706
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
707
 (defstate :plaintext-state (stream)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
708
   (let ((data (html5-stream-char stream)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
709
     (cond ((eql data +eof+)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
710
            ;; Tokenization ends.
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
711
            (return nil))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
712
           ((eql data #\u0000)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
713
            (push-token self '(:type :parse-error :data :invalid-codepoint))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
714
            (push-token* self :characters #\uFFFD))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
715
           (t
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
716
            (push-token* self :characters
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
717
                         data
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
718
                         (html5-stream-chars-until stream '(#\u0000)))))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
719
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
720
 (defstate :tag-open-state (stream state current-token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
721
   (let ((data (html5-stream-char stream)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
722
     (cond ((eql data #\!)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
723
             (setf state :markup-declaration-open-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
724
           ((eql data #\/)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
725
             (setf state :close-tag-open-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
726
           ((ascii-letter-p data)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
727
            (setf current-token (list :type :start-tag
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
728
                                      :name (make-array 1 :element-type 'character
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
729
                                                           :initial-element data
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
730
                                                           :fill-pointer t
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
731
                                                           :adjustable t)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
732
                                      :data '()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
733
                                      :self-closing nil
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
734
                                      :self-closing-acknowledged nil))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
735
             (setf state :tag-name-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
736
           ((eql data #\>)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
737
            ;; XXX In theory it could be something besides a tag name. But
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
738
            ;; do we really care?
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
739
            (push-token self '(:type :parse-error :data :expected-tag-name-but-got-right-bracket))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
740
            (push-token* self :characters "<>")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
741
            (setf state :data-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
742
           ((eql data #\?)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
743
            ;; XXX In theory it could be something besides a tag name. But
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
744
            ;; do we really care?
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
745
            (push-token self '(:type :parse-error :data :expected-tag-name-but-got-question-mark))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
746
            (html5-stream-unget stream data)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
747
            (setf state :bogus-comment-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
748
           (t
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
749
            ;; XXX
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
750
            (push-token self '(:type :parse-error :data :expected-tag-name))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
751
            (push-token* self :characters "<")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
752
            (html5-stream-unget stream data)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
753
            (setf state :data-state)))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
754
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
755
 (defstate :close-tag-open-state
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
756
     (stream state current-token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
757
   (let ((data (html5-stream-char stream)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
758
     (cond ((ascii-letter-p data)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
759
            (setf current-token (list :type :end-tag
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
760
                                      :name (make-array 1 :element-type 'character
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
761
                                                           :initial-element data
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
762
                                                           :fill-pointer t
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
763
                                                           :adjustable t)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
764
                                      :data '()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
765
                                      :self-closing nil))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
766
            (setf state :tag-name-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
767
           ((eql data #\>)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
768
            (push-token self '(:type :parse-error :data :expected-closing-tag-but-got-right-bracket))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
769
            (setf state :data-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
770
           ((eql data +eof+)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
771
            (push-token self '(:type :parse-error :data :expected-closing-tag-but-got-eof))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
772
            (push-token* self :characters "</")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
773
            (setf state :data-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
774
           (t
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
775
            ;; XXX data can be _'_...
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
776
            (push-token self `(:type :parse-error :data :expected-closing-tag-but-got-char
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
777
                                     :datavars (:data ,data)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
778
            (html5-stream-unget stream data)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
779
            (setf state :bogus-comment-state))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
780
   t)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
781
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
782
 (defstate :tag-name-state (stream state current-token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
783
   (let ((data (html5-stream-char stream)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
784
     (cond ((find data +space-characters+)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
785
            (setf state :before-attribute-name-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
786
           ((eql data #\>)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
787
            (emit-current-token self))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
788
           ((eql data +eof+)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
789
            (push-token self '(:type :parse-error :data :eof-in-tag-name))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
790
            (setf state :data-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
791
           ((eql data #\/)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
792
            (setf state :self-closing-start-tag-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
793
           ((eql data #\u0000)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
794
            (push-token self '(:type :parse-error :data :invalid-codepoint))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
795
            (vector-push-extend #\uFFFD (getf current-token :name)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
796
           (t
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
797
            (vector-push-extend data (getf current-token :name))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
798
            ;; (Don't use charsUntil here, because tag names are
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
799
            ;; very short and it's faster to not do anything fancy)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
800
            ))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
801
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
802
 (defstate :rcdata-less-than-sign-state (stream state temporary-buffer)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
803
   (let ((data (html5-stream-char stream)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
804
     (cond ((eql data #\/)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
805
            (setf temporary-buffer (make-growable-string))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
806
            (setf state :rcdata-end-tag-open-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
807
           (t
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
808
            (push-token* self :characters "<")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
809
            (html5-stream-unget stream data)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
810
            (setf state :rcdata-state)))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
811
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
812
 (defstate :rcdata-end-tag-open-state (stream state temporary-buffer)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
813
   (let ((data (html5-stream-char stream)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
814
     (cond ((ascii-letter-p data)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
815
            (nconcatf temporary-buffer (string data))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
816
            (setf state :rcdata-end-tag-name-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
817
           (t
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
818
            (push-token* self :characters "</")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
819
            (html5-stream-unget stream data)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
820
            (setf state :rcdata-state)))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
821
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
822
 (defstate :rcdata-end-tag-name-state (stream state temporary-buffer current-token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
823
   (let ((appropriate (and current-token
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
824
                           (string-equal (getf current-token :name)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
825
                                         temporary-buffer)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
826
         (data (html5-stream-char stream)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
827
     (cond ((and (find data +space-characters+)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
828
                 appropriate)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
829
            (setf current-token (list :type :end-tag
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
830
                                      :name temporary-buffer
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
831
                                      :data '()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
832
                                      :self-closing nil))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
833
            (setf state :before-attribute-name-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
834
           ((and (eql data #\/)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
835
                 appropriate)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
836
            (setf current-token (list :type :end-tag
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
837
                                      :name temporary-buffer
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
838
                                      :data '()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
839
                                      :self-closing nil))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
840
            (setf state :self-closing-start-tag-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
841
           ((and (eql data #\>)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
842
                 appropriate)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
843
            (setf current-token (list :type :end-tag
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
844
                                      :name temporary-buffer
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
845
                                      :data '()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
846
                                      :self-closing nil))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
847
            (emit-current-token self)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
848
            (setf state :data-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
849
           ((ascii-letter-p data)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
850
            (nconcatf temporary-buffer data))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
851
           (t
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
852
            (push-token* self :characters "</" temporary-buffer)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
853
            (html5-stream-unget stream data)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
854
            (setf state :rcdata-state)))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
855
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
856
 (defstate :rawtext-less-than-sign-state (stream state temporary-buffer)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
857
   (let ((data (html5-stream-char stream)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
858
     (cond ((eql data #\/)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
859
            (setf temporary-buffer (make-growable-string))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
860
            (setf state :rawtext-end-tag-open-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
861
           (t
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
862
            (push-token* self :characters "<")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
863
            (html5-stream-unget stream data)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
864
            (setf state :rawtext-state)))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
865
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
866
 (defstate :rawtext-end-tag-open-state (stream state temporary-buffer)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
867
   (let ((data (html5-stream-char stream)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
868
     (cond ((ascii-letter-p data)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
869
            (nconcatf temporary-buffer (string data))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
870
            (setf state :rawtext-end-tag-name-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
871
           (t
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
872
            (push-token* self :characters "</")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
873
            (html5-stream-unget stream data)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
874
            (setf state :rawtext-state)))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
875
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
876
 (defstate :rawtext-end-tag-name-state (stream state temporary-buffer current-token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
877
   (let ((appropriate (and current-token
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
878
                           (string-equal (getf current-token :name)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
879
                                         temporary-buffer)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
880
         (data (html5-stream-char stream)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
881
     (cond ((and (find data +space-characters+)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
882
                 appropriate)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
883
            (setf current-token (list :type :end-tag
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
884
                                      :name temporary-buffer
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
885
                                      :data '()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
886
                                      :self-closing nil))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
887
            (setf state :before-attribute-name-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
888
           ((and (eql data #\/)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
889
                 appropriate)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
890
            (setf current-token (list :type :end-tag
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
891
                                      :name temporary-buffer
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
892
                                      :data '()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
893
                                      :self-closing nil))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
894
            (setf state :self-closing-start-tag-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
895
           ((and (eql data #\>)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
896
                 appropriate)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
897
            (setf current-token (list :type :end-tag
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
898
                                      :name temporary-buffer
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
899
                                      :data '()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
900
                                      :self-closing nil))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
901
            (emit-current-token self)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
902
            (setf state :data-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
903
           ((ascii-letter-p data)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
904
            (nconcatf temporary-buffer data))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
905
           (t
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
906
            (push-token* self :characters "</" temporary-buffer)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
907
            (html5-stream-unget stream data)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
908
            (setf state :rawtext-state)))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
909
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
910
 (defstate :script-data-less-than-sign-state (stream state temporary-buffer)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
911
   (let ((data (html5-stream-char stream)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
912
     (cond ((eql data #\/)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
913
            (setf temporary-buffer (make-growable-string))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
914
            (setf state :script-data-end-tag-open-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
915
           ((eql data #\!)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
916
            (push-token* self :characters "<!")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
917
            (setf state :script-data-escape-start-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
918
           (t
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
919
            (push-token* self :characters "<")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
920
            (html5-stream-unget stream data)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
921
            (setf state :script-data-state)))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
922
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
923
 (defstate :script-data-end-tag-open-state (stream state temporary-buffer)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
924
   (let ((data (html5-stream-char stream)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
925
     (cond ((ascii-letter-p data)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
926
            (nconcatf temporary-buffer data)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
927
            (setf state :script-data-end-tag-name-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
928
           (t
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
929
            (push-token* self :characters "</")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
930
            (html5-stream-unget stream data)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
931
            (setf state :script-data-state)))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
932
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
933
 (defstate :script-data-end-tag-name-state (stream state temporary-buffer current-token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
934
   (let ((appropriate (and current-token
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
935
                           (string-equal (getf current-token :name)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
936
                                         temporary-buffer)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
937
         (data (html5-stream-char stream)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
938
     (cond ((and (find data +space-characters+)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
939
                 appropriate)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
940
            (setf current-token (list :type :end-tag
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
941
                                      :name temporary-buffer
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
942
                                      :data '()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
943
                                      :self-closing nil))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
944
            (setf state :before-attribute-name-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
945
           ((and (eql data #\/)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
946
                 appropriate)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
947
            (setf current-token (list :type :end-tag
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
948
                                      :name temporary-buffer
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
949
                                      :data '()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
950
                                      :self-closing nil))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
951
            (setf state :self-closing-start-tag-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
952
           ((and (eql data #\>)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
953
                 appropriate)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
954
            (setf current-token (list :type :end-tag
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
955
                                      :name temporary-buffer
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
956
                                      :data '()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
957
                                      :self-closing nil))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
958
            (emit-current-token self)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
959
            (setf state :data-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
960
           ((ascii-letter-p data)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
961
            (nconcatf temporary-buffer data))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
962
           (t
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
963
            (push-token* self :characters "</" temporary-buffer)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
964
            (html5-stream-unget stream data)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
965
            (setf state :script-data-state)))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
966
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
967
 (defstate :script-data-escape-start-state (stream state)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
968
   (let ((data (html5-stream-char stream)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
969
     (cond ((eql data #\-)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
970
            (push-token* self :characters "-")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
971
            (setf state :script-data-escape-start-dash-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
972
           (t
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
973
            (html5-stream-unget stream data)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
974
            (setf state :script-data-state)))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
975
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
976
 (defstate :script-data-escape-start-dash-state (stream state)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
977
   (let ((data (html5-stream-char stream)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
978
     (cond ((eql data #\-)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
979
            (push-token* self :characters "-")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
980
            (setf state :script-data-escaped-dash-dash-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
981
           (t
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
982
            (html5-stream-unget stream data)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
983
            (setf state :script-data-state)))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
984
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
985
 (defstate :script-data-escaped-state (stream state)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
986
   (let ((data (html5-stream-char stream)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
987
     (cond ((eql data #\-)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
988
            (push-token* self :characters "-")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
989
            (setf state :script-data-escaped-dash-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
990
           ((eql data #\<)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
991
            (setf state :script-data-escaped-less-than-sign-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
992
           ((eql data #\u0000)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
993
            (push-token self '(:type :parse-error :data :invalid-codepoint))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
994
            (push-token* self :characters #\uFFFD))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
995
           ((eql data +eof+)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
996
            (setf state :data-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
997
           (t
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
998
            (push-token* self :characters data (html5-stream-chars-until stream '(#\< #\- #\u0000)))))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
999
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1000
 (defstate :script-data-escaped-dash-state (stream state)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1001
   (let ((data (html5-stream-char stream)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1002
     (cond ((eql data #\-)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1003
            (push-token* self :characters "-")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1004
            (setf state :script-data-escaped-dash-dash-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1005
           ((eql data #\<)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1006
            (setf state :script-data-escaped-less-than-sign-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1007
           ((eql data #\u0000)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1008
            (push-token self '(:type :parse-error :data :invalid-codepoint))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1009
            (push-token* self :characters #\uFFFD)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1010
            (setf state :script-data-escaped-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1011
           ((eql data +eof+)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1012
            (setf state :data-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1013
           (t
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1014
            (push-token* self :characters data (html5-stream-chars-until stream '(#\< #\- #\u0000)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1015
            (setf state :script-data-escaped-state)))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1016
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1017
 (defstate :script-data-escaped-dash-dash-state (stream state)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1018
   (let ((data (html5-stream-char stream)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1019
     (cond ((eql data #\-)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1020
            (push-token* self :characters "-"))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1021
           ((eql data #\<)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1022
            (setf state :script-data-escaped-less-than-sign-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1023
           ((eql data #\>)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1024
            (push-token* self :characters ">")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1025
            (setf state :script-data-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1026
           ((eql data #\u0000)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1027
            (push-token self '(:type :parse-error :data :invalid-codepoint))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1028
            (push-token* self :characters #\uFFFD)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1029
            (setf state :script-data-escaped-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1030
           ((eql data +eof+)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1031
            (setf state :data-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1032
           (t
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1033
            (push-token* self :characters data (html5-stream-chars-until stream '(#\< #\- #\u0000)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1034
            (setf state :script-data-escaped-state)))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1035
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1036
 (defstate :script-data-escaped-less-than-sign-state (stream state temporary-buffer)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1037
   (let ((data (html5-stream-char stream)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1038
     (cond ((eql data #\/)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1039
            (setf temporary-buffer (make-growable-string))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1040
            (setf state :script-data-escaped-end-tag-open-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1041
           ((ascii-letter-p data)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1042
            (push-token* self :characters "<" data)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1043
            (setf temporary-buffer (ascii-upper-2-lower (string data)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1044
            (setf state :script-data-double-escape-start-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1045
           (t
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1046
            (push-token* self :characters "<")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1047
            (html5-stream-unget stream data)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1048
            (setf state :script-data-escaped-state)))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1049
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1050
 (defstate :script-data-escaped-end-tag-open-state (stream state temporary-buffer)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1051
   (let ((data (html5-stream-char stream)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1052
     (cond ((ascii-letter-p data)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1053
            (setf temporary-buffer (string data))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1054
            (setf state :script-data-escaped-end-tag-name-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1055
           (t
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1056
            (push-token* self :characters "</")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1057
            (html5-stream-unget stream data)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1058
            (setf state :script-data-escaped-state)))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1059
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1060
 (defstate :script-data-escaped-end-tag-name-state (stream state temporary-buffer current-token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1061
   (let ((appropriate (and current-token
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1062
                           (string-equal (getf current-token :name)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1063
                                         temporary-buffer)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1064
         (data (html5-stream-char stream)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1065
     (cond ((and (find data +space-characters+)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1066
                 appropriate)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1067
            (setf current-token (list :type :end-tag
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1068
                                      :name temporary-buffer
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1069
                                      :data '()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1070
                                      :self-closing nil))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1071
            (setf state :before-attribute-name-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1072
           ((and (eql data #\/)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1073
                 appropriate)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1074
            (setf current-token (list :type :end-tag
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1075
                                      :name temporary-buffer
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1076
                                      :data '()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1077
                                      :self-closing nil))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1078
            (setf state :self-closing-start-tag-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1079
           ((and (eql data #\>)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1080
                 appropriate)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1081
            (setf current-token (list :type :end-tag
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1082
                                      :name temporary-buffer
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1083
                                      :data '()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1084
                                      :self-closing nil))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1085
            (emit-current-token self)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1086
            (setf state :data-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1087
           ((ascii-letter-p data)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1088
            (nconcatf temporary-buffer data))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1089
           (t
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1090
            (push-token* self :characters "</" temporary-buffer)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1091
            (html5-stream-unget stream data)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1092
            (setf state :script-data-escaped-state)))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1093
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1094
 (defstate :script-data-double-escape-start-state (stream state temporary-buffer)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1095
   (let ((data (html5-stream-char stream)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1096
     (cond ((or (find data +space-characters+)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1097
                (find data '(#\/ #\>)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1098
            (push-token* self :characters data)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1099
            (if (string= (string-downcase temporary-buffer) "script")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1100
                (setf state :script-data-double-escaped-state)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1101
                (setf state :script-data-escaped-state)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1102
           ((ascii-letter-p data)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1103
            (push-token* self :characters data)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1104
            (nconcatf temporary-buffer (string data)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1105
           (t
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1106
            (html5-stream-unget stream data)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1107
            (setf state :script-data-escaped-state)))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1108
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1109
 (defstate :script-data-double-escaped-state (stream state)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1110
   (let ((data (html5-stream-char stream)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1111
     (cond ((eql data #\-)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1112
            (push-token* self :characters "-")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1113
            (setf state :script-data-double-escaped-dash-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1114
           ((eql data #\<)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1115
            (push-token* self :characters "<")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1116
            (setf state :script-data-double-escaped-less-than-sign-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1117
           ((eql data #\u0000)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1118
            (push-token self '(:type :parse-error :data :invalid-codepoint))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1119
            (push-token* self :characters #\uFFFD))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1120
           ((eql data +eof+)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1121
            (push-token self '(:type :parse-error :data :eof-in-script-in-script))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1122
            (setf state :data-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1123
           (t
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1124
            (push-token* self :characters data)))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1125
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1126
 (defstate :script-data-double-escaped-dash-state (stream state)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1127
   (let ((data (html5-stream-char stream)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1128
     (cond ((eql data #\-)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1129
            (push-token* self :characters "-")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1130
            (setf state :script-data-double-escaped-dash-dash-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1131
           ((eql data #\<)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1132
            (push-token* self :characters "<")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1133
            (setf state :script-data-double-escaped-less-than-sign-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1134
           ((eql data #\u0000)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1135
            (push-token self '(:type :parse-error :data :invalid-codepoint))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1136
            (push-token* self :characters #\uFFFD)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1137
            (setf state :script-data-double-escaped-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1138
           ((eql data +eof+)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1139
            (push-token self '(:type :parse-error :data :eof-in-script-in-script))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1140
            (setf state :data-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1141
           (t
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1142
            (push-token* self :characters data)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1143
            (setf state :script-data-double-escaped-state)))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1144
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1145
 ;; FIXME: Incorrectly named in Python code: scriptDataDoubleEscapedDashState (same the one above)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1146
 (defstate :script-data-double-escaped-dash-dash-state (stream state)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1147
   (let ((data (html5-stream-char stream)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1148
     (cond ((eql data #\-)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1149
            (push-token* self :characters "-")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1150
            (setf state :script-data-double-escaped-dash-dash-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1151
           ((eql data #\<)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1152
            (push-token* self :characters "<")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1153
            (setf state :script-data-double-escaped-less-than-sign-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1154
           ((eql data #\>)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1155
            (push-token* self :characters ">")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1156
            (setf state :script-data-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1157
           ((eql data #\u0000)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1158
            (push-token self '(:type :parse-error :data :invalid-codepoint))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1159
            (push-token* self :characters #\uFFFD)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1160
            (setf state :script-data-double-escaped-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1161
           ((eql data +eof+)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1162
            (push-token self '(:type :parse-error :data :eof-in-script-in-script))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1163
            (setf state :data-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1164
           (t
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1165
            (push-token* self :characters data)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1166
            (setf state :script-data-double-escaped-state)))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1167
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1168
 (defstate :script-data-double-escaped-less-than-sign-state (stream state temporary-buffer)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1169
   (let ((data (html5-stream-char stream)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1170
     (cond ((eql data #\/)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1171
            (push-token* self :characters "/")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1172
            (setf temporary-buffer (make-growable-string))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1173
            (setf state :script-data-double-escape-end-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1174
           (t
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1175
            (html5-stream-unget stream data)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1176
            (setf state :script-data-double-escaped-state)))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1177
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1178
 (defstate :script-data-double-escape-end-state (stream state temporary-buffer)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1179
   (let ((data (html5-stream-char stream)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1180
     (cond ((or (find data +space-characters+)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1181
                (find data '(#\/ #\>)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1182
            (push-token* self :characters data)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1183
            (if (string= (string-downcase temporary-buffer) "script")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1184
                (setf state :script-data-escaped-state)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1185
                (setf state :script-data-double-escaped-state)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1186
           ((ascii-letter-p data)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1187
            (push-token* self :characters data)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1188
            (nconcatf temporary-buffer data))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1189
           (t
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1190
            (html5-stream-unget stream data)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1191
            (setf state :script-data-double-escaped-state)))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1192
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1193
 (defstate :before-attribute-name-state (stream state current-token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1194
   (let ((data (html5-stream-char stream)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1195
     (cond ((find data +space-characters+)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1196
            (html5-stream-chars-until stream +space-characters+ t))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1197
           ((ascii-letter-p data)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1198
            (add-attribute current-token data)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1199
            (setf state :attribute-name-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1200
           ((eql data #\>)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1201
            (emit-current-token self))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1202
           ((eql data #\/)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1203
            (setf state :self-closing-start-tag-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1204
           ((find data '(#\' #\" #\= #\<))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1205
            (push-token self '(:type :parse-error :data :invalid-character-in-attribute-name))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1206
            (add-attribute current-token data)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1207
            (setf state :attribute-name-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1208
           ((eql data #\u0000)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1209
            (push-token self '(:type :parse-error :data :invalid-codepoint))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1210
            (add-attribute current-token #\uFFFD)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1211
            (setf state :attribute-name-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1212
           ((eql data +eof+)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1213
            (push-token self '(:type :parse-error :data :expected-attribute-name-but-got-eof))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1214
            (setf state :data-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1215
           (t
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1216
            (add-attribute current-token data)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1217
            (setf state :attribute-name-state)))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1218
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1219
 (defstate :attribute-name-state (stream state current-token lowercase-attr-name)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1220
   (let ((data (html5-stream-char stream))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1221
         (leaving-this-state t)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1222
         (emit-token nil))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1223
     (cond ((eql data #\=)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1224
            (setf state :before-attribute-value-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1225
           ((ascii-letter-p data)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1226
            (add-to-attr-name current-token data
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1227
                              (html5-stream-chars-until stream +ascii-letters+ t))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1228
            (setf leaving-this-state nil))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1229
           ((eql data #\>)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1230
            ;; XXX If we emit here the attributes are converted to a dict
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1231
            ;; without being checked and when the code below runs we error
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1232
            ;; because data is a dict not a list
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1233
            (setf emit-token t))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1234
           ((find data +space-characters+)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1235
            (setf state :after-attribute-name-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1236
           ((eql data #\/)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1237
            (setf state :self-closing-start-tag-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1238
           ((eql data #\u0000)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1239
            (push-token self '(:type :parse-error :data :invalid-codepoint))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1240
            (add-to-attr-name current-token #\uFFFD)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1241
            (setf leaving-this-state nil))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1242
           ((find data '(#\' #\" #\<))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1243
             (push-token self '(:type :parse-error :data :invalid-character-in-attribute-name))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1244
            (add-to-attr-name current-token data)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1245
            (setf leaving-this-state nil))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1246
           ((eql data +eof+)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1247
            (push-token self '(:type :parse-error :data :eof-in-attribute-name))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1248
            (setf state :data-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1249
           (t
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1250
            (add-to-attr-name current-token data)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1251
            (setf leaving-this-state nil)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1252
     (when leaving-this-state
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1253
       ;; Attributes are not dropped at this stage. That happens when the
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1254
       ;; start tag token is emitted so values can still be safely appended
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1255
       ;; to attributes, but we do want to report the parse error in time.
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1256
       (when lowercase-attr-name
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1257
         (setf (caar (last (getf current-token :data)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1258
               (ascii-upper-2-lower (caar (last (getf current-token :data))))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1259
       (loop for (name . value) in (butlast (getf current-token :data)) do
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1260
            (when (string= (caar (last (getf current-token :data))) name)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1261
              (push-token self '(:type :parse-error :data :duplicate-attribute))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1262
              (return)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1263
       ;; XXX Fix for above XXX
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1264
       (when emit-token
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1265
         (emit-current-token self)))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1266
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1267
 (defstate :after-attribute-name-state (stream state current-token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1268
   (let ((data (html5-stream-char stream)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1269
     (cond ((find data +space-characters+)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1270
            (html5-stream-chars-until stream +space-characters+ t))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1271
           ((eql data #\=)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1272
            (setf state :before-attribute-value-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1273
           ((eql data #\>)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1274
            (emit-current-token self))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1275
           ((ascii-letter-p data)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1276
            (add-attribute current-token data)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1277
            (setf state :attribute-name-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1278
           ((eql data #\/)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1279
            (setf state :self-closing-start-tag-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1280
           ((eql data #\u0000)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1281
            (push-token self '(:type :parse-error :data :invalid-codepoint))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1282
            (add-attribute current-token #\uFFFD)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1283
            (setf state :attribute-name-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1284
           ((find data '(#\' #\" #\<))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1285
            (push-token self '(:type :parse-error :data :invalid-character-after-attribute-name))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1286
            (add-attribute current-token data)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1287
            (setf state :attribute-name-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1288
           ((eql data +eof+)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1289
            (push-token self '(:type :parse-error :data :expected-end-of-tag-but-got-eof))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1290
            (setf state :data-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1291
           (t
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1292
            (add-attribute current-token data)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1293
            (setf state :attribute-name-state)))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1294
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1295
 (defstate :before-attribute-value-state (stream state current-token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1296
   (let ((data (html5-stream-char stream)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1297
     (cond ((find data +space-characters+)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1298
            (html5-stream-chars-until stream +space-characters+ t))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1299
           ((eql data #\")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1300
            (setf state :attribute-value-double-quoted-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1301
           ((eql data #\&)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1302
            (setf state :attribute-value-un-quoted-state)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1303
            (html5-stream-unget stream data))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1304
           ((eql data #\')
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1305
            (setf state :attribute-value-single-quoted-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1306
           ((eql data #\>)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1307
            (push-token self '(:type :parse-error :data :expected-attribute-value-but-got-right-bracket))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1308
            (emit-current-token self))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1309
           ((eql data #\u0000)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1310
            (push-token self '(:type :parse-error :data :invalid-codepoint))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1311
            (add-to-attr-value current-token #\uFFFD)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1312
            (setf state :attribute-value-un-quoted-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1313
           ((find data '(#\= #\< #\`))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1314
            (push-token self '(:type :parse-error :data :equals-in-unquoted-attribute-value))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1315
            (add-to-attr-value current-token data)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1316
            (setf state :attribute-value-un-quoted-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1317
           ((eql data +eof+)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1318
             (push-token self '(:type :parse-error :data :expected-attribute-value-but-got-eof))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1319
             (setf state :data-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1320
           (t
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1321
            (add-to-attr-value current-token data)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1322
            (setf state :attribute-value-un-quoted-state)))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1323
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1324
 (defstate :attribute-value-double-quoted-state (stream state current-token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1325
   (let ((data (html5-stream-char stream)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1326
     (cond ((eql data #\")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1327
            (setf state :after-attribute-value-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1328
           ((eql data #\&)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1329
            (process-entity-in-attribute self :allowed-char #\"))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1330
           ((eql data #\u0000)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1331
            (push-token self '(:type :parse-error :data :invalid-codepoint))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1332
            (add-to-attr-value current-token #\uFFFD))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1333
           ((eql data +eof+)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1334
             (push-token self '(:type :parse-error :data :eof-in-attribute-value-double-quote))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1335
             (setf state :data-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1336
           (t
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1337
            (add-to-attr-value current-token
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1338
                               data
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1339
                               (html5-stream-chars-until stream '(#\" #\&)))))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1340
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1341
 (defstate :attribute-value-single-quoted-state (stream state current-token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1342
   (let ((data (html5-stream-char stream)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1343
     (cond ((eql data #\')
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1344
            (setf state :after-attribute-value-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1345
           ((eql data #\&)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1346
            (process-entity-in-attribute self :allowed-char #\'))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1347
           ((eql data #\u0000)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1348
            (push-token self '(:type :parse-error :data :invalid-codepoint))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1349
            (add-to-attr-value current-token #\uFFFD))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1350
           ((eql data +eof+)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1351
             (push-token self '(:type :parse-error :data :eof-in-attribute-value-single-quote))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1352
             (setf state :data-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1353
           (t
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1354
            (add-to-attr-value current-token
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1355
                               data
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1356
                               (html5-stream-chars-until stream '(#\' #\&)))))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1357
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1358
 (defstate :attribute-value-un-quoted-state (stream state current-token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1359
   (let ((data (html5-stream-char stream)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1360
     (cond ((find data +space-characters+)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1361
            (setf state :before-attribute-name-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1362
           ((eql data #\&)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1363
            (process-entity-in-attribute self :allowed-char #\>))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1364
           ((eql data #\>)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1365
            (emit-current-token self))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1366
           ((find data '(#\" #\' #\= #\< #\`))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1367
            (push-token self '(:type :parse-error :data :unexpected-character-in-unquoted-attribute-value))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1368
            (add-to-attr-value current-token data))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1369
           ((eql data #\u0000)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1370
            (push-token self '(:type :parse-error :data :invalid-codepoint))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1371
            (add-to-attr-value current-token #\uFFFD))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1372
           ((eql data +eof+)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1373
            (push-token self '(:type :parse-error :data :eof-in-attribute-value-no-quotes))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1374
            (setf state :data-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1375
           (t
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1376
            (add-to-attr-value current-token
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1377
                               data
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1378
                               (html5-stream-chars-until stream `(#\& #\> #\" #\' #\= #\< #\`
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1379
                                                               ,@+space-characters+)))))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1380
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1381
 (defstate :after-attribute-value-state (stream state current-token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1382
   (let ((data (html5-stream-char stream)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1383
     (cond ((find data +space-characters+)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1384
            (setf state :before-attribute-name-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1385
           ((eql data #\>)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1386
            (emit-current-token self))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1387
           ((eql data #\/)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1388
            (setf state :self-closing-start-tag-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1389
           ((eql data +eof+)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1390
            (push-token self '(:type :parse-error :data :unexpected-EOF-after-attribute-value))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1391
            (html5-stream-unget stream data)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1392
            (setf state :data-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1393
           (t
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1394
            (push-token self '(:type :parse-error :data :unexpected-character-after-attribute-value))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1395
            (html5-stream-unget stream data)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1396
            (setf state :before-attribute-name-state)))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1397
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1398
 (defstate :self-closing-start-tag-state (stream state current-token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1399
   (let ((data (html5-stream-char stream)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1400
     (cond ((eql data #\>)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1401
            (setf (getf current-token :self-closing) t)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1402
            (emit-current-token self))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1403
           ((eql data +eof+)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1404
            (push-token self '(:type :parse-error :data :unexpected-EOF-after-solidus-in-tag))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1405
            (html5-stream-unget stream data)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1406
            (setf state :data-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1407
           (t
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1408
            (push-token self '(:type :parse-error :data :unexpected-character-after-soldius-in-tag))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1409
            (html5-stream-unget stream data)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1410
            (setf state :before-attribute-name-state)))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1411
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1412
 (defstate :bogus-comment-state (stream state current-token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1413
   ;; Make a new comment token and give it as value all the characters
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1414
   ;; until the first > or EOF (charsUntil checks for EOF automatically)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1415
   ;; and emit it.
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1416
   (let ((data (html5-stream-chars-until stream '(#\>))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1417
     (setf data (substitute #\uFFFD #\u0000 data))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1418
     (push-token* self :comment data)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1419
     ;; Eat the character directly after the bogus comment which is either a
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1420
     ;; ">" or an EOF.
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1421
     (html5-stream-char stream)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1422
     (setf state :data-state)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1423
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1424
 (defstate :markup-declaration-open-state (stream state current-token
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1425
                                                  cdata-switch-helper)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1426
   (let ((char-stack (make-array 1
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1427
                                 :initial-element (html5-stream-char stream)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1428
                                 :fill-pointer 1
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1429
                                 :adjustable t)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1430
     (cond ((eql (aref char-stack (1- (length char-stack))) #\-)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1431
            (vector-push-extend (html5-stream-char stream) char-stack)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1432
            (when (eql (aref char-stack (1- (length char-stack))) #\-)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1433
              (setf current-token (list :type :comment :data ""))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1434
              (setf state :comment-start-state)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1435
              (return t)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1436
           ((find (aref char-stack (1- (length char-stack))) '(#\d #\D))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1437
            (let ((matched t))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1438
              (loop for expected in '((#\o #\O) (#\c #\C) (#\t #\T) (#\y #\Y) (#\p #\P) (#\e #\E)) do
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1439
                   (vector-push-extend (html5-stream-char stream) char-stack)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1440
                   (unless (find (aref char-stack (1- (length char-stack))) expected)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1441
                     (setf matched nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1442
                     (return)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1443
              (when matched
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1444
                (setf current-token (list :type :doctype
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1445
                                          :name ""
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1446
                                          :public-id nil
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1447
                                          :system-id nil
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1448
                                          :correct t))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1449
                (setf state :doctype-state)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1450
                (return t))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1451
           ((and (eql (aref char-stack (1- (length char-stack))) #\[)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1452
                 (funcall cdata-switch-helper))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1453
            (let ((matched t))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1454
              (loop for expected across "CDATA[" do
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1455
                   (vector-push-extend (html5-stream-char stream) char-stack)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1456
                   (unless (eql (aref char-stack (1- (length char-stack))) expected)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1457
                     (setf matched nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1458
                     (return)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1459
              (when matched
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1460
                (setf state :cdata-section-state)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1461
                (return t)))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1462
     (push-token self '(:type :parse-error :data :expected-dashes-or-doctype))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1463
     (loop while (plusp (length char-stack)) do
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1464
          (html5-stream-unget stream (vector-pop char-stack)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1465
     (setf state :bogus-comment-state)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1466
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1467
 (defstate :comment-start-state (stream state current-token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1468
   (let ((data (html5-stream-char stream)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1469
     (cond ((eql data #\-)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1470
            (setf state :comment-start-dash-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1471
           ((eql data #\u0000)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1472
            (push-token self '(:type :parse-error :data :invalid-codepoint))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1473
            (add-to current-token :data #\uFFFD))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1474
           ((eql data #\>)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1475
            (push-token self '(:type :parse-error :data :incorrect-comment))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1476
            (push-token self current-token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1477
            (setf state :data-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1478
           ((eql data +eof+)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1479
            (push-token self '(:type :parse-error :data :eof-in-comment))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1480
            (push-token self current-token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1481
            (setf state :data-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1482
           (t
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1483
            (add-to current-token :data data)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1484
            (setf state :comment-state)))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1485
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1486
 (defstate :comment-start-dash-state (stream state current-token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1487
   (let ((data (html5-stream-char stream)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1488
     (cond ((eql data #\-)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1489
            (setf state :comment-end-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1490
           ((eql data #\u0000)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1491
            (push-token self '(:type :parse-error :data :invalid-codepoint))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1492
            (add-to current-token :data "-" #\uFFFD))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1493
           ((eql data #\>)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1494
            (push-token self '(:type :parse-error :data :incorrect-comment))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1495
            (push-token self current-token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1496
            (setf state :data-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1497
           ((eql data +eof+)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1498
            (push-token self '(:type :parse-error :data :eof-in-comment))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1499
            (push-token self current-token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1500
            (setf state :data-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1501
           (t
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1502
            (add-to current-token :data "-" data)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1503
            (setf state :comment-state)))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1504
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1505
 (defstate :comment-state (stream state current-token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1506
   (let ((data (html5-stream-char stream)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1507
     (cond ((eql data #\-)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1508
            (setf state :comment-end-dash-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1509
           ((eql data #\u0000)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1510
            (push-token self '(:type :parse-error :data :invalid-codepoint))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1511
            (add-to current-token :data #\uFFFD))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1512
           ((eql data +eof+)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1513
            (push-token self '(:type :parse-error :data :eof-in-comment))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1514
            (push-token self current-token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1515
            (setf state :data-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1516
           (t
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1517
            (add-to current-token :data data
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1518
                         (html5-stream-chars-until stream '(#\- #\u0000)))))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1519
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1520
 (defstate :comment-end-dash-state (stream state current-token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1521
   (let ((data (html5-stream-char stream)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1522
     (cond ((eql data #\-)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1523
            (setf state :comment-end-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1524
           ((eql data #\u0000)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1525
            (push-token self '(:type :parse-error :data :invalid-codepoint))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1526
            (add-to current-token :data "-" #\uFFFD))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1527
           ((eql data +eof+)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1528
            (push-token self '(:type :parse-error :data :eof-in-comment-end-dash))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1529
            (push-token self current-token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1530
            (setf state :data-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1531
           (t
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1532
            (add-to current-token :data "-" data)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1533
            (setf state :comment-state)))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1534
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1535
 (defstate :comment-end-state (stream state current-token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1536
   (let ((data (html5-stream-char stream)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1537
     (cond ((eql data #\>)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1538
            (push-token self current-token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1539
            (setf state :data-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1540
           ((eql data #\u0000)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1541
            (push-token self '(:type :parse-error :data :invalid-codepoint))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1542
            (add-to current-token :data "--" #\uFFFD)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1543
            (setf state :comment-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1544
           ((eql data #\!)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1545
            (push-token self '(:type :parse-error :data :unexpected-bang-after-double-dash-in-comment))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1546
            (setf state :comment-end-bang-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1547
           ((eql data #\-)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1548
            (push-token self '(:type :parse-error :data :unexpected-dash-after-double-dash-in-comment))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1549
            (add-to current-token :data data))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1550
           ((eql data +eof+)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1551
            (push-token self '(:type :parse-error :data :eof-in-comment-double-dash))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1552
            (push-token self current-token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1553
            (setf state :data-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1554
           (t
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1555
            ;; XXX
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1556
            (push-token self '(:type :parse-error :data :unexpected-char-in-comment))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1557
            (add-to current-token :data "--" data)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1558
            (setf state :comment-state)))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1559
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1560
 (defstate :comment-end-bang-state (stream state current-token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1561
   (let ((data (html5-stream-char stream)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1562
     (cond ((eql data #\>)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1563
            (push-token self current-token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1564
            (setf state :data-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1565
           ((eql data #\-)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1566
            (add-to current-token :data "--!")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1567
            (setf state :comment-end-dash-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1568
           ((eql data #\u0000)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1569
            (push-token self '(:type :parse-error :data :invalid-codepoint))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1570
            (add-to current-token :data "--!" #\uFFFD)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1571
            (setf state :comment-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1572
           ((eql data +eof+)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1573
            (push-token self '(:type :parse-error :data :eof-in-comment-end-bang-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1574
            (push-token self current-token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1575
            (setf state :data-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1576
           (t
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1577
            (add-to current-token :data "--!" data)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1578
            (setf state :comment-state)))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1579
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1580
 (defstate :doctype-state (stream state current-token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1581
   (let ((data (html5-stream-char stream)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1582
     (cond ((find data +space-characters+)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1583
            (setf state :before-doctype-name-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1584
           ((eql data +eof+)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1585
            (push-token self '(:type :parse-error :data :expected-doctype-name-but-got-eof))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1586
            (setf (getf current-token :correct) nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1587
            (push-token self current-token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1588
            (setf state :data-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1589
           (t
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1590
            (push-token self '(:type :parse-error :data :need-space-after-doctype))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1591
            (html5-stream-unget stream data)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1592
            (setf state :before-doctype-name-state)))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1593
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1594
 (defstate :before-doctype-name-state (stream state current-token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1595
   (let ((data (html5-stream-char stream)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1596
     (cond ((find data +space-characters+)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1597
            ;; pass
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1598
            )
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1599
           ((eql data #\>)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1600
            (push-token self '(:type :parse-error :data :expected-doctype-name-but-got-right-bracket))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1601
            (setf (getf current-token :correct) nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1602
            (push-token self current-token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1603
            (setf state :data-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1604
           ((eql data #\u0000)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1605
            (push-token self '(:type :parse-error :data :invalid-codepoint))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1606
            (add-to current-token :name #\uFFFD)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1607
            (setf state :doctype-name-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1608
           ((eql data +eof+)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1609
            (push-token self '(:type :parse-error :data :expected-doctype-name-but-got-eof))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1610
            (setf (getf current-token :correct) nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1611
            (push-token self current-token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1612
            (setf state :data-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1613
           (t
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1614
            (setf (getf current-token :name) (string data))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1615
            (setf state :doctype-name-state)))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1616
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1617
 (defstate :doctype-name-state (stream state current-token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1618
   (let ((data (html5-stream-char stream)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1619
     (cond ((find data +space-characters+)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1620
            (setf (getf current-token :name) (ascii-upper-2-lower (getf current-token :name)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1621
            (setf state :after-doctype-name-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1622
           ((eql data #\>)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1623
            (setf (getf current-token :name) (ascii-upper-2-lower (getf current-token :name)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1624
            (push-token self current-token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1625
            (setf state :data-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1626
           ((eql data #\u0000)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1627
            (push-token self '(:type :parse-error :data :invalid-codepoint))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1628
            (add-to current-token :name #\uFFFD)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1629
            (setf state :doctype-name-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1630
           ((eql data +eof+)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1631
            (push-token self '(:type :parse-error :data :eof-in-doctype-name))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1632
            (setf (getf current-token :correct) nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1633
            (setf (getf current-token :name) (ascii-upper-2-lower (getf current-token :name)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1634
            (push-token self current-token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1635
            (setf state :data-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1636
           (t
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1637
            (add-to current-token :name data)))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1638
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1639
 (defstate :after-doctype-name-state (stream state current-token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1640
   (let ((data (html5-stream-char stream)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1641
     (cond ((find data +space-characters+)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1642
            ;; pass
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1643
            )
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1644
           ((eql data #\>)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1645
            (push-token self current-token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1646
            (setf state :data-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1647
           ((eql data +eof+)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1648
            (setf (getf current-token :correct) nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1649
            (html5-stream-unget stream data)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1650
            (push-token self '(:type :parse-error :data :eof-in-doctype))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1651
            (push-token self current-token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1652
            (setf state :data-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1653
           (t
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1654
            (cond ((find data '(#\p #\P))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1655
                   (let ((matched t))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1656
                     (loop for expected in '((#\u #\U) (#\b #\B) (#\l #\L) (#\i #\I) (#\c #\C)) do
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1657
                          (setf data (html5-stream-char stream))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1658
                          (unless (find data expected)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1659
                            (setf matched nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1660
                            (return)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1661
                     (when matched
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1662
                       (setf state :after-doctype-public-keyword-state)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1663
                       (return t))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1664
                  ((find data '(#\s #\S))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1665
                   (let ((matched t))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1666
                     (loop for expected in '((#\y #\Y) (#\s #\S) (#\t #\T) (#\e #\E) (#\m #\M)) do
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1667
                          (setf data (html5-stream-char stream))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1668
                          (unless (find data expected)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1669
                            (setf matched nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1670
                            (return)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1671
                     (when matched
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1672
                       (setf state :after-doctype-system-keyword-state)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1673
                       (return t)))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1674
            ;; All the characters read before the current 'data' will be
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1675
            ;; [a-zA-Z], so they're garbage in the bogus doctype and can be
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1676
            ;; discarded; only the latest character might be '>' or EOF
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1677
            ;; and needs to be ungetted
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1678
            (html5-stream-unget stream data)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1679
            (push-token self `(:type :parse-error :data :expected-space-or-right-bracket-in-doctype
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1680
                                     :datavars (:data ,data)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1681
            (setf (getf current-token :correct) nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1682
            (setf state :bogus-doctype-state)))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1683
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1684
 (defstate :after-doctype-public-keyword-state (stream state current-token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1685
   (let ((data (html5-stream-char stream)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1686
     (cond ((find data +space-characters+)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1687
            (setf state :before-doctype-public-identifier-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1688
           ((find data '(#\' #\"))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1689
            (push-token self '(:type :parse-error :data :unexpected-char-in-doctype))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1690
            (html5-stream-unget stream data)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1691
            (setf state :before-doctype-public-identifier-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1692
           ((eql data +eof+)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1693
            (push-token self '(:type :parse-error :data :eof-in-doctype))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1694
            (setf (getf current-token :correct) nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1695
            (push-token self current-token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1696
            (setf state :data-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1697
           (t
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1698
            (html5-stream-unget stream data)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1699
            (setf state :before-doctype-public-identifier-state)))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1700
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1701
 (defstate :before-doctype-public-identifier-state (stream state current-token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1702
   (let ((data (html5-stream-char stream)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1703
     (cond ((find data +space-characters+)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1704
            ;; pass
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1705
            )
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1706
           ((eql data #\")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1707
            (setf (getf current-token :public-id) "")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1708
            (setf state :doctype-public-identifier-double-quoted-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1709
           ((eql data #\')
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1710
            (setf (getf current-token :public-id) "")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1711
            (setf state :doctype-public-identifier-single-quoted-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1712
           ((eql data #\>)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1713
            (push-token self '(:type :parse-error :data :unexpected-end-of-doctype))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1714
            (setf (getf current-token :correct) nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1715
            (push-token self current-token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1716
            (setf state :data-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1717
           ((eql data +eof+)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1718
            (push-token self '(:type :parse-error :data :eof-in-doctype))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1719
            (setf (getf current-token :correct) nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1720
            (push-token self current-token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1721
            (setf state :data-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1722
           (t
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1723
            (push-token self '(:type :parse-error :data :unexpected-char-in-doctype))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1724
            (setf (getf current-token :correct) nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1725
            (setf state :bogus-doctype-state)))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1726
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1727
 (defstate :doctype-public-identifier-double-quoted-state (stream state current-token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1728
   (let ((data (html5-stream-char stream)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1729
     (cond ((eql data #\")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1730
            (setf state :after-doctype-public-identifier-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1731
           ((eql data #\u0000)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1732
            (push-token self '(:type :parse-error :data :invalid-codepoint))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1733
            (add-to current-token :public-id #\uFFFD))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1734
           ((eql data #\>)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1735
            (push-token self '(:type :parse-error :data :unexpected-end-of-doctype))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1736
            (setf (getf current-token :correct) nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1737
            (push-token self current-token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1738
            (setf state :data-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1739
           ((eql data +eof+)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1740
            (push-token self '(:type :parse-error :data :eof-in-doctype))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1741
            (setf (getf current-token :correct) nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1742
            (push-token self current-token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1743
            (setf state :data-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1744
           (t
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1745
            (add-to current-token :public-id data)))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1746
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1747
 (defstate :doctype-public-identifier-single-quoted-state (stream state current-token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1748
   (let ((data (html5-stream-char stream)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1749
     (cond ((eql data #\')
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1750
            (setf state :after-doctype-public-identifier-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1751
           ((eql data #\u0000)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1752
            (push-token self '(:type :parse-error :data :invalid-codepoint))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1753
            (add-to current-token :public-id #\uFFFD))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1754
           ((eql data #\>)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1755
            (push-token self '(:type :parse-error :data :unexpected-end-of-doctype))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1756
            (setf (getf current-token :correct) nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1757
            (push-token self current-token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1758
            (setf state :data-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1759
           ((eql data +eof+)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1760
            (push-token self '(:type :parse-error :data :eof-in-doctype))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1761
            (setf (getf current-token :correct) nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1762
            (push-token self current-token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1763
            (setf state :data-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1764
           (t
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1765
            (add-to current-token :public-id data)))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1766
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1767
 (defstate :after-doctype-public-identifier-state (stream state current-token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1768
   (let ((data (html5-stream-char stream)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1769
     (cond ((find data +space-characters+)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1770
            (setf state :between-doctype-public-and-system-identifiers-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1771
           ((eql data #\>)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1772
            (push-token self current-token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1773
            (setf state :data-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1774
           ((eql data #\")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1775
            (push-token self '(:type :parse-error :data :unexpected-char-in-doctype))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1776
            (setf (getf current-token :system-id) "")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1777
            (setf state :doctype-system-identifier-double-quoted-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1778
           ((eql data #\')
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1779
            (push-token self '(:type :parse-error :data :unexpected-char-in-doctype))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1780
            (setf (getf current-token :system-id) "")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1781
            (setf state :doctype-system-identifier-single-quoted-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1782
           ((eql data +eof+)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1783
            (push-token self '(:type :parse-error :data :eof-in-doctype))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1784
            (setf (getf current-token :correct) nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1785
            (push-token self current-token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1786
            (setf state :data-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1787
           (t
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1788
            (push-token self '(:type :parse-error :data :unexpected-char-in-doctype))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1789
            (setf (getf current-token :correct) nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1790
            (setf state :bogus-doctype-state)))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1791
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1792
 (defstate :between-doctype-public-and-system-identifiers-state (stream state current-token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1793
   (let ((data (html5-stream-char stream)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1794
     (cond ((find data +space-characters+)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1795
            ;; pass
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1796
            )
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1797
           ((eql data #\>)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1798
            (push-token self current-token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1799
            (setf state :data-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1800
           ((eql data #\")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1801
            (setf (getf current-token :system-id) "")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1802
            (setf state :doctype-system-identifier-double-quoted-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1803
           ((eql data #\')
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1804
            (setf (getf current-token :system-id) "")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1805
            (setf state :doctype-system-identifier-single-quoted-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1806
           ((eql data +eof+)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1807
            (push-token self '(:type :parse-error :data :eof-in-doctype))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1808
            (setf (getf current-token :correct) nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1809
            (push-token self current-token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1810
            (setf state :data-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1811
           (t
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1812
            (push-token self '(:type :parse-error :data :unexpected-char-in-doctype))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1813
            (setf (getf current-token :correct) nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1814
            (setf state :bogus-doctype-state)))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1815
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1816
 (defstate :after-doctype-system-keyword-state (stream state current-token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1817
   (let ((data (html5-stream-char stream)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1818
     (cond ((find data +space-characters+)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1819
            (setf state :before-doctype-system-identifier-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1820
           ((find data '(#\' #\"))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1821
            (push-token self '(:type :parse-error :data :unexpected-char-in-doctype))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1822
            (html5-stream-unget stream data)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1823
            (setf state :before-doctype-system-identifier-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1824
           ((eql data +eof+)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1825
            (push-token self '(:type :parse-error :data :eof-in-doctype))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1826
            (setf (getf current-token :correct) nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1827
            (push-token self current-token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1828
            (setf state :data-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1829
           (t
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1830
            (html5-stream-unget stream data)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1831
            (setf state :before-doctype-system-identifier-state)))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1832
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1833
 (defstate :before-doctype-system-identifier-state (stream state current-token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1834
   (let ((data (html5-stream-char stream)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1835
     (cond ((find data +space-characters+)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1836
            ;; pass
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1837
            )
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1838
           ((eql data #\")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1839
            (setf (getf current-token :system-id) "")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1840
            (setf state :doctype-system-identifier-double-quoted-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1841
           ((eql data #\')
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1842
            (setf (getf current-token :system-id) "")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1843
            (setf state :doctype-system-identifier-single-quoted-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1844
           ((eql data #\>)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1845
            (push-token self '(:type :parse-error :data :unexpected-end-of-doctype))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1846
            (setf (getf current-token :correct) nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1847
            (push-token self current-token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1848
            (setf state :data-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1849
           ((eql data +eof+)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1850
            (push-token self '(:type :parse-error :data :eof-in-doctype))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1851
            (setf (getf current-token :correct) nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1852
            (push-token self current-token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1853
            (setf state :data-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1854
           (t
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1855
            (push-token self '(:type :parse-error :data :unexpected-char-in-doctype))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1856
            (setf (getf current-token :correct) nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1857
            (setf state :bogus-doctype-state)))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1858
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1859
 (defstate :doctype-system-identifier-double-quoted-state (stream state current-token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1860
   (let ((data (html5-stream-char stream)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1861
     (cond ((eql data #\")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1862
            (setf state :after-doctype-system-identifier-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1863
           ((eql data #\u0000)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1864
            (push-token self '(:type :parse-error :data :invalid-codepoint))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1865
            (add-to current-token :system-id #\uFFFD))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1866
           ((eql data #\>)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1867
            (push-token self '(:type :parse-error :data :unexpected-end-of-doctype))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1868
            (setf (getf current-token :correct) nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1869
            (push-token self current-token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1870
            (setf state :data-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1871
           ((eql data +eof+)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1872
            (push-token self '(:type :parse-error :data :eof-in-doctype))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1873
            (setf (getf current-token :correct) nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1874
            (push-token self current-token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1875
            (setf state :data-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1876
           (t
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1877
            (add-to current-token :system-id data)))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1878
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1879
 (defstate :doctype-system-identifier-single-quoted-state (stream state current-token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1880
   (let ((data (html5-stream-char stream)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1881
     (cond ((eql data #\')
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1882
            (setf state :after-doctype-system-identifier-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1883
           ((eql data #\u0000)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1884
            (push-token self '(:type :parse-error :data :invalid-codepoint))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1885
            (add-to current-token :system-id #\uFFFD))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1886
           ((eql data #\>)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1887
            (push-token self '(:type :parse-error :data :unexpected-end-of-doctype))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1888
            (setf (getf current-token :correct) nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1889
            (push-token self current-token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1890
            (setf state :data-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1891
           ((eql data +eof+)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1892
            (push-token self '(:type :parse-error :data :eof-in-doctype))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1893
            (setf (getf current-token :correct) nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1894
            (push-token self current-token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1895
            (setf state :data-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1896
           (t
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1897
            (add-to current-token :system-id data)))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1898
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1899
 (defstate :after-doctype-system-identifier-state (stream state current-token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1900
   (let ((data (html5-stream-char stream)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1901
     (cond ((find data +space-characters+)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1902
            ;; pass
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1903
            )
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1904
           ((eql data #\>)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1905
            (push-token self current-token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1906
            (setf state :data-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1907
           ((eql data +eof+)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1908
            (push-token self '(:type :parse-error :data :eof-in-doctype))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1909
            (setf (getf current-token :correct) nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1910
            (push-token self current-token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1911
            (setf state :data-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1912
           (t
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1913
            (push-token self '(:type :parse-error :data :unexpected-char-in-doctype))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1914
            (setf state :bogus-doctype-state)))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1915
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1916
 (defstate :bogus-doctype-state (stream state current-token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1917
   (let ((data (html5-stream-char stream)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1918
     (cond ((eql data #\>)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1919
            (push-token self current-token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1920
            (setf state :data-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1921
           ((eql data +eof+)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1922
            ;; XXX EMIT
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1923
            (html5-stream-unget stream data)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1924
            (push-token self current-token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1925
            (setf state :data-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1926
           (t
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1927
            ;; pass
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1928
            ))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1929
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1930
 (defstate :cdata-section-state (stream state current-token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1931
   (let ((data '()))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1932
     (loop
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1933
        (push (html5-stream-chars-until stream '(#\])) data)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1934
       (let ((char-stack '())
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1935
             (matched t))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1936
         (loop for expected across "]]>" do
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1937
              (push (html5-stream-char stream) char-stack)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1938
              (cond ((eql (car char-stack) +eof+)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1939
                     (pop char-stack)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1940
                     (setf data (append char-stack data))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1941
                     (return))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1942
                    ((not (eql (car char-stack) expected))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1943
                     (setf matched nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1944
                     (setf data (append char-stack data))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1945
                     (return))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1946
         (when matched
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1947
           (return))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1948
     (setf data (apply #'concatenate 'string (mapcar #'string (nreverse data))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1949
     ;; Deal with null here rather than in the parser
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1950
     (let ((null-count (count #\u0000 data)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1951
       (when (plusp null-count)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1952
         (push-token self '(:type :parse-error :data :invalid-codepoint))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1953
         (setf data (nsubstitute #\uFFFD #\u0000 data))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1954
     (when (plusp (length data))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1955
       (push-token* self :characters data))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1956
     (setf state :data-state)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1957
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1958
 ;;; simple-tree
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1959
 ;; A basic implementation of a DOM-core like thing
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1960
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1961
 (defclass node ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1962
   ((type :initform :node :allocation :class :reader node-type)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1963
    (name :initarg :name :initform nil :reader node-name)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1964
    (namespace :initarg :namespace :initform nil :reader node-namespace)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1965
    (parent :initform nil :reader node-parent)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1966
    (value :initform nil :initarg :value
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1967
           :accessor node-value)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1968
    (child-nodes :initform nil :accessor %node-child-nodes)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1969
    (last-child :initform nil :accessor last-child)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1970
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1971
 (defmethod (setf %node-child-nodes) :after (value (node node))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1972
   (setf (last-child node) (last value)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1973
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1974
 (defclass document (node)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1975
   ((type :initform :document :allocation :class)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1976
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1977
 (defclass document-fragment (document)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1978
   ((type :initform :document-fragment :allocation :class)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1979
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1980
 (defclass document-type (node)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1981
   ((type :initform :document-type :allocation :class)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1982
    (public-id :initarg :public-id :reader node-public-id)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1983
    (system-id :initarg :system-id :reader node-system-id)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1984
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1985
 (defclass text-node (node)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1986
   ((type :initform :text :allocation :class)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1987
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1988
 (defclass element (node)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1989
   ((type :initform :element :allocation :class)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1990
    (attributes :initform nil :accessor %node-attributes)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1991
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1992
 (defclass comment-node (node)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1993
   ((type :initform :comment :allocation :class)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1994
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1995
 ;;;
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1996
 ;;; Creating nodes
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1997
 ;;;
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1998
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1999
 (defun make-document ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2000
   (make-instance 'document))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2001
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2002
 (defun make-fragment (document)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2003
   (declare (ignore document))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2004
   (make-instance 'document-fragment))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2005
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2006
 (defun make-doctype (document name public-id system-id)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2007
   (declare (ignore document))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2008
   (make-instance 'document-type :name name :public-id public-id :system-id system-id))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2009
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2010
 (defun make-comment (document data)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2011
   (declare (ignore document))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2012
   (make-instance 'comment-node :value data))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2013
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2014
 (defun make-element (document name namespace)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2015
   (declare (ignore document))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2016
   (make-instance 'element :name name :namespace namespace))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2017
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2018
 (defun make-text-node (document data)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2019
   (declare (ignore document))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2020
   (make-instance 'text-node :value data))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2021
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2022
 ;;;
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2023
 ;;; Node methods
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2024
 ;;;
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2025
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2026
 (defun node-first-child (node)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2027
   (car (%node-child-nodes node)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2028
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2029
 (defun node-last-child (node)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2030
   (car (last-child node)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2031
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2032
 (defun node-previous-sibling (node)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2033
   (loop for (this next) on (%node-child-nodes (node-parent node))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2034
         when (eql next node) do (return this)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2035
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2036
 (defun node-next-sibling (node)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2037
   (loop for (this next) on (%node-child-nodes (node-parent node))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2038
         when (eql this node) do (return next)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2039
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2040
 (defun node-remove-child (node child)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2041
   (setf (%node-child-nodes node)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2042
         (remove child (%node-child-nodes node)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2043
   (setf (slot-value child 'parent) nil))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2044
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2045
 (defun node-append-child (node child)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2046
   (when (node-parent child)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2047
     (node-remove-child (node-parent child) child))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2048
   (setf (slot-value child 'parent) node)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2049
   (if (%node-child-nodes node)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2050
       (setf (last-child node)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2051
             (push child (cdr (last-child node))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2052
       (setf (%node-child-nodes node)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2053
             (list child)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2054
   (%node-child-nodes node))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2055
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2056
 (defun node-insert-before (node child insert-before)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2057
   (let ((child-nodes (%node-child-nodes node)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2058
     (setf (slot-value child 'parent) node)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2059
     (labels ((insert-before (child-nodes)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2060
                (cond ((endp child-nodes)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2061
                       (cons child nil))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2062
                      ((eql (car child-nodes) insert-before)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2063
                       (cons child child-nodes))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2064
                      (t (rplacd child-nodes (insert-before (cdr child-nodes)))))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2065
       (setf (%node-child-nodes node)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2066
             (insert-before child-nodes)))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2067
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2068
 (defun element-attribute (node attribute &optional namespace)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2069
   (cdr (assoc (cons attribute namespace)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2070
               (%node-attributes node)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2071
               :test #'equal)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2072
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2073
 (defun (setf element-attribute) (new-value node attribute
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2074
                                   &optional namespace)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2075
   (check-type attribute string)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2076
   (check-type new-value string)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2077
   (let ((old-attr (assoc (cons attribute namespace)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2078
                          (%node-attributes node)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2079
                          :test #'equal)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2080
     (if old-attr
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2081
         (setf (cdr old-attr) new-value)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2082
         (push (cons (cons attribute namespace) new-value) (%node-attributes node)))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2083
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2084
 ;;;
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2085
 ;;; Traversing
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2086
 ;;;
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2087
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2088
 (defun element-map-children (function node)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2089
   (map nil function (%node-child-nodes node)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2090
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2091
 (defun element-map-attributes* (function node)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2092
   (loop for ((name . namespace) . value) in (%node-attributes node)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2093
         do (funcall function name namespace value)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2094
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2095
 (defun element-map-attributes (function node)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2096
   (element-map-attributes*
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2097
    (lambda (name namespace value)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2098
      (funcall function
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2099
               (if namespace
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2100
                   (format nil "~A:~A" (find-prefix namespace) name)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2101
                   name)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2102
               namespace
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2103
               value))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2104
    node))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2105
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2106
 ;;
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2107
 ;; Printing for the ease of debugging
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2108
 ;;
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2109
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2110
 (defun node-count (tree)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2111
   (typecase tree
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2112
     (element (1+ (apply #'+ (mapcar #'node-count (%node-child-nodes tree)))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2113
     ((or document document-fragment)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2114
      (apply #'+ (mapcar #'node-count (%node-child-nodes tree))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2115
     (t 1)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2116
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2117
 (defmethod print-object ((node document) stream)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2118
   (print-unreadable-object (node stream :type t :identity t)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2119
     (format stream "nodes: ~A" (node-count node))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2120
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2121
 (defmethod print-object ((node node) stream)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2122
   (print-unreadable-object (node stream :type t :identity t)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2123
     (format stream "~A" (node-name node))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2124
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2125
 (defmethod print-object ((node text-node) stream)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2126
   (print-unreadable-object (node stream :type t :identity t)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2127
     (write (node-value node) :stream stream :length 30)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2128
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2129
 ;;; html5-parser-class
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2130
 (defvar *parser*)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2131
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2132
 (defclass html-parser ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2133
   ((html-namespace :initform (find-namespace "html"))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2134
    (strict :initarg :strict)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2135
    (inner-html-mode)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2136
    (container :initform "div")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2137
    (tokenizer)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2138
    (document :initform (make-document))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2139
    (errors :initform '())
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2140
    (phase :accessor parser-phase)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2141
    first-start-tag
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2142
    compat-mode
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2143
    inner-html
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2144
    last-phase
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2145
    original-phase
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2146
    before-rcdata-phase
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2147
    (character-tokens :initform nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2148
    frameset-ok
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2149
    open-elements
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2150
    active-formatting-elements
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2151
    head-pointer
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2152
    form-pointer
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2153
    insert-from-table
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2154
    (in-body-process-space-characters-mode :initform :non-pre)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2155
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2156
 ;;; tree-help
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2157
 (defmacro pop-end (place)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2158
   "Pop from the end of list"
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2159
   (let ((old-list (gensym)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2160
     `(let ((,old-list ,place))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2161
        (prog1 (car (last ,old-list))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2162
          (setf ,place (butlast ,old-list))))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2163
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2164
 (defmacro push-end (object place)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2165
   "Push to the end of list"
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2166
   `(progn
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2167
      ;(format t "~&push ~S to ~S" ',object ',place)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2168
      (setf ,place (append ,place (list ,object)))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2169
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2170
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2171
 (defvar *parser*)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2172
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2173
 (defun document* ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2174
   (slot-value *parser* 'document))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2175
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2176
 (defun node-clone* (node)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2177
   (ecase (node-type node)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2178
     (:document
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2179
       (make-document))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2180
     (:document-fragment
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2181
      (make-fragment (document*)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2182
     (:document-type
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2183
      (make-doctype (document*)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2184
                    (node-name node)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2185
                    (node-public-id node)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2186
                    (node-system-id node)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2187
     (:comment
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2188
       (make-comment (document*) (node-value node)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2189
     (:text
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2190
      (make-text-node (document*) (node-value node)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2191
     (:element
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2192
      (let ((clone (make-element (document*) (node-name node) (node-namespace node))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2193
        (element-map-attributes*
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2194
         (lambda (name namespace value)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2195
           (setf (element-attribute clone name namespace) value))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2196
         node)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2197
        clone))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2198
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2199
 (defun node-name-tuple (node)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2200
   (cons (or (node-namespace node)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2201
             (find-namespace "html"))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2202
         (node-name node)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2203
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2204
 (defun node-name-tuple-values (node)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2205
   (values (or (node-namespace node)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2206
               (find-namespace "html"))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2207
           (node-name node)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2208
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2209
 (defun node-has-content (node)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2210
   (not (null (node-first-child node))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2211
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2212
 (defun node-attributes= (node1 node2)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2213
   (labels ((has-all-attributes-of (node1 node2)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2214
              (element-map-attributes*
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2215
               (lambda (name namespace value)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2216
                 (unless (equal value
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2217
                                (element-attribute node2 name namespace))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2218
                   (return-from has-all-attributes-of nil)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2219
               node1)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2220
              t))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2221
     (and (has-all-attributes-of node1 node2)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2222
          (has-all-attributes-of node2 node1))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2223
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2224
 (defun node-append-child* (node child)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2225
   (let ((last-child (node-last-child node)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2226
     (if (and (eql :text (node-type child))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2227
              last-child
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2228
              (eql :text (node-type last-child)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2229
         (nconcatf (node-value last-child)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2230
                   (node-value child))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2231
         (node-append-child node child))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2232
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2233
 (defun node-insert-before* (node child insert-before)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2234
   (when (eql :text (node-type child))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2235
     (let ((prev-child (node-previous-sibling insert-before)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2236
       (when (and prev-child
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2237
                  (eql :text (node-type prev-child)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2238
         (node-remove-child node prev-child)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2239
         (setf child (make-text-node
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2240
                      (document*)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2241
                      (concatenate 'string
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2242
                                   (node-value prev-child)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2243
                                   (node-value child)))))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2244
   (node-insert-before node child insert-before))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2245
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2246
 (defun node-reparent-children (node new-parent)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2247
   (element-map-children (lambda (child)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2248
                           (node-append-child new-parent child))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2249
                         node))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2250
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2251
 (defun node-insert-text (node data &optional insert-before)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2252
   (if insert-before
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2253
       (node-insert-before* node (make-text-node (document*) data) insert-before)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2254
       (node-append-child* node (make-text-node (document*) data))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2255
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2256
 (defun last-open-element ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2257
   (with-slots (open-elements) *parser*
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2258
     (car (last open-elements))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2259
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2260
 (defun create-element (token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2261
   "Create an element but don't insert it anywhere"
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2262
   (with-slots (html-namespace) *parser*
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2263
     (let ((element (make-element (document*)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2264
                                  (getf token :name)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2265
                                  (or (getf token :namespace)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2266
                                      html-namespace))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2267
       (loop for (name . value) in (getf token :data)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2268
             do (if (consp name)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2269
                    (setf (element-attribute element (second name) (third name)) value)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2270
                    (setf (element-attribute element name) value)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2271
       element)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2272
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2273
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2274
 (defun insert-root (token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2275
   (with-slots (open-elements) *parser*
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2276
     (let ((element (create-element token)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2277
       (assert element)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2278
       (push-end element open-elements)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2279
       (node-append-child (document*) element))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2280
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2281
 (defun insert-doctype (token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2282
   (node-append-child (document*)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2283
                      (make-doctype (document*)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2284
                                    (getf token :name)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2285
                                    (getf token :public-id)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2286
                                    (getf token :system-id))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2287
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2288
 (defun insert-comment (token &optional parent)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2289
   (with-slots (open-elements) *parser*
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2290
     (unless parent
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2291
       (setf parent (car (last open-elements))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2292
     (node-append-child parent (make-comment (document*) (getf token :data)))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2293
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2294
 (defun insert-element-normal (token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2295
   (with-slots (open-elements) *parser*
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2296
    (let ((element (create-element token)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2297
      (node-append-child (last-open-element) element)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2298
      (push-end element open-elements)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2299
      element)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2300
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2301
 (defun insert-element-table (token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2302
   (with-slots (open-elements) *parser*
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2303
     (if (not (member (node-name (last-open-element))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2304
                      +table-insert-mode-elements+ :test #'string=))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2305
         (insert-element-normal token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2306
         (let ((element (create-element token)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2307
           ;; We should be in the InTable mode. This means we want to do
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2308
           ;; special magic element rearranging
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2309
           (multiple-value-bind (parent insert-before)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2310
               (get-table-misnested-nodeposition)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2311
             (if (not insert-before)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2312
                 (node-append-child* parent element)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2313
                 (node-insert-before* parent element insert-before))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2314
             (push-end element open-elements))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2315
           element))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2316
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2317
 (defun insert-element (token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2318
   (with-slots (insert-from-table) *parser*
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2319
     (if insert-from-table
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2320
         (insert-element-table token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2321
         (insert-element-normal token))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2322
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2323
 (defun parser-insert-text (data &optional parent)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2324
   "Insert text data."
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2325
   (with-slots (open-elements insert-from-table) *parser*
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2326
     (unless parent
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2327
       (setf parent (car (last open-elements))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2328
     (cond ((or (not insert-from-table)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2329
                (and insert-from-table
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2330
                     (not (member (node-name (last-open-element))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2331
                                  +table-insert-mode-elements+ :test #'string=))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2332
            (node-insert-text parent data))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2333
           (t
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2334
            ;; We should be in the InTable mode. This means we want to do
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2335
            ;; special magic element rearranging
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2336
            (multiple-value-bind (parent insert-before)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2337
                (get-table-misnested-nodeposition)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2338
              (node-insert-text parent data insert-before))))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2339
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2340
 (defun get-table-misnested-nodeposition ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2341
   "Get the foster parent element, and sibling to insert before
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2342
     (or None) when inserting a misnested table node"
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2343
   (with-slots (open-elements) *parser*
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2344
     ;; The foster parent element is the one which comes before the most
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2345
     ;; recently opened table element
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2346
     (let ((last-table (find "table" open-elements :key #'node-name :test #'string= :from-end t))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2347
           (foster-parent nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2348
           (insert-before nil))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2349
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2350
       (cond (last-table
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2351
               ;; XXX - we should really check that this parent is actually a
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2352
               ;; node here
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2353
               (if (node-parent last-table)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2354
                   (setf foster-parent (node-parent last-table)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2355
                         insert-before last-table)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2356
                   (setf foster-parent (elt open-elements (1- (position last-table open-elements))))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2357
              (t
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2358
               (setf foster-parent (first open-elements))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2359
       (values foster-parent insert-before))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2360
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2361
 (defun generate-implied-end-tags (&optional exclude)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2362
   (with-slots (open-elements) *parser*
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2363
     (let ((name (node-name (last-open-element))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2364
       ;; XXX td, th and tr are not actually needed
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2365
       (when (and (member name '("dd" "dt" "li" "option" "optgroup" "p" "rp" "rt") :test #'string=)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2366
                  (not (equal name exclude)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2367
         (pop-end open-elements)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2368
         ;; XXX This is not entirely what the specification says. We should
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2369
         ;; investigate it more closely.
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2370
         (generate-implied-end-tags exclude)))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2371
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2372
 (defun reconstruct-active-formatting-elements ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2373
   ;; Within this algorithm the order of steps described in the
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2374
   ;; specification is not quite the same as the order of steps in the
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2375
   ;; code. It should still do the same though.
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2376
   (with-slots (active-formatting-elements open-elements) *parser*
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2377
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2378
     ;; Step 1: stop the algorithm when there's nothing to do.
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2379
     (unless active-formatting-elements
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2380
       (return-from reconstruct-active-formatting-elements))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2381
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2382
     ;; Step 2 and step 3: we start with the last element. So i is -1.
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2383
     (let* ((i (1- (length active-formatting-elements)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2384
            (entry (elt active-formatting-elements i)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2385
       (when (or (eql entry :marker)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2386
                 (member entry open-elements))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2387
         (return-from reconstruct-active-formatting-elements))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2388
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2389
       ;; Step 6
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2390
       (loop while (and (not (eql entry :marker))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2391
                        (not (member entry open-elements))) do
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2392
            (when (zerop i)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2393
              ;; This will be reset to 0 below
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2394
              (setf i -1)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2395
              (return))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2396
            (decf i)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2397
          ;; Step 5: let entry be one earlier in the list.
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2398
            (setf entry (elt active-formatting-elements i)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2399
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2400
       (loop
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2401
          ;; Step 7
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2402
          (incf i)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2403
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2404
          ;; Step 8
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2405
          (setf entry (elt active-formatting-elements i))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2406
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2407
          ;; Step 9
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2408
          (let* ((element (insert-element (list :type :start-tag
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2409
                                                :name (node-name entry)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2410
                                                :namespace (node-namespace entry)))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2411
            (element-map-attributes* (lambda (name namespace value)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2412
                                       (setf (element-attribute element name namespace) value))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2413
                                     entry)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2414
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2415
            ;; Step 10
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2416
            (setf (elt active-formatting-elements i) element)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2417
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2418
            ;; Step 11
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2419
            (when (eql element (car (last active-formatting-elements)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2420
              (return)))))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2421
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2422
 (defun clear-active-formatting-elements ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2423
  (with-slots (active-formatting-elements) *parser*
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2424
    (loop for entry = (pop-end active-formatting-elements)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2425
       while (and active-formatting-elements
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2426
                  (not (eql entry :marker))))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2427
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2428
 (defun element-in-active-formatting-elements (name)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2429
   "Check if an element exists between the end of the active
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2430
    formatting elements and the last marker. If it does, return it, else
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2431
    return false"
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2432
   (with-slots (active-formatting-elements) *parser*
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2433
     (loop for item in (reverse active-formatting-elements) do
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2434
        ;; Check for Marker first because if it's a Marker it doesn't have a
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2435
        ;; name attribute.
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2436
          (when (eql item :marker)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2437
            (return nil))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2438
          (when (string= (node-name item) name)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2439
            (return item)))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2440
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2441
 (defun scope-tree ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2442
   (load-time-value
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2443
    (flet ((unflatten (alist)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2444
             "Turn an alist into a tree."
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2445
             (let ((alist2
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2446
                     (mapcar #'list
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2447
                             (remove-duplicates (mapcar #'car alist)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2448
                                                :test #'equal))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2449
               (loop for (key . value) in alist
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2450
                     do (push value (cdr (assoc key alist2
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2451
                                                :test #'equal))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2452
               ;; Put the XHTML ns first.
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2453
               (sort alist2 #'<
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2454
                     :key (lambda (pair)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2455
                            (position (car pair)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2456
                                      '("http://www.w3.org/1999/xhtml"
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2457
                                        "http://www.w3.org/2000/svg"
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2458
                                        "http://www.w3.org/1998/Math/MathML")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2459
                                      :test #'string=))))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2460
      (let ((html (find-namespace "html")))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2461
        `((nil . ,(unflatten +scoping-elements+))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2462
          ("button" . ,(unflatten
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2463
                        `(,@+scoping-elements+
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2464
                          (,html . "button"))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2465
          ("list" . ,(unflatten
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2466
                      `(,@+scoping-elements+
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2467
                        (,html . "ol")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2468
                        (,html . "ul"))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2469
          ("table" . ((,html "html" "table")))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2470
          ("select" . ((,html "optgroup" "option"))))))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2471
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2472
 (defun element-in-scope (target &optional variant)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2473
   (let ((list-elements
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2474
           (cdr (assoc variant (scope-tree) :test #'equal)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2475
         (invert (equal "select" variant)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2476
     (dolist (node (reverse (slot-value *parser* 'open-elements)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2477
       (when (or (and (stringp target)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2478
                      (string= (node-name node) target))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2479
                 (eql node target))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2480
         (return-from element-in-scope t))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2481
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2482
       (multiple-value-bind (ns name)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2483
           (node-name-tuple-values node)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2484
         (let ((found (member name (cdr (assoc ns list-elements :test #'string=))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2485
                              :test #'string=)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2486
           (when invert
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2487
             (setf found (not found)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2488
           (when found
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2489
             (return-from element-in-scope nil)))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2490
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2491
     (error "We should never reach this point")))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2492
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2493
 ;;; Parser
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2494
 ;; external interface
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2495
 (defun parse-html5 (source &key encoding strictp container dom)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2496
   (parse-html5-from-source source
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2497
                            :encoding encoding
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2498
                            :strictp strictp
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2499
                            :container container
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2500
                            :dom dom))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2501
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2502
 (defun parse-html5-fragment (source &key encoding strictp (container "div") dom)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2503
   (parse-html5-from-source source
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2504
                            :encoding encoding
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2505
                            :strictp strictp
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2506
                            :container container
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2507
                            :dom dom))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2508
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2509
 (defgeneric transform-html5-dom (to-type node &key)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2510
   (:method ((to-type cons) node &key)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2511
     (apply #'transform-html5-dom (car to-type) node (cdr to-type)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2512
   (:method (to-type node &key &allow-other-keys)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2513
     (error "No TRANSFORM-HTML5-DOM method defined for dom type ~S." to-type)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2514
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2515
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2516
 ;; internal
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2517
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2518
 (defun parse-html5-from-source (source &key container encoding strictp dom)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2519
   (let ((*parser* (make-instance 'html-parser
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2520
                                  :strict strictp)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2521
     (parser-parse source
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2522
                   :fragment-p container
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2523
                   :encoding encoding)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2524
     (with-slots (open-elements errors) *parser*
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2525
       (let ((document
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2526
              (if container
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2527
                  (let ((fragment (make-fragment (document*))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2528
                    (node-reparent-children (first open-elements) fragment)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2529
                    fragment)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2530
                  (document*))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2531
         (values (if dom
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2532
                     (transform-html5-dom dom document)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2533
                     document)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2534
                 (reverse errors))))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2535
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2536
 (defvar *phase*)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2537
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2538
 (defun ascii-ichar= (char1 char2)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2539
   "ASCII case-insensitive char="
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2540
   (or (char= char1 char2)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2541
       (and (or (char<= #\A char1 #\Z)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2542
                (char<= #\A char2 #\Z))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2543
            (char= (char-downcase char1)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2544
                   (char-downcase char2)))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2545
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2546
 (defun ascii-istring= (string1 string2)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2547
   "ASCII case-insensitive string="
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2548
   (every #'ascii-ichar= string1 string2))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2549
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2550
 (defun cdata-switch-helper ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2551
   (and (last-open-element)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2552
        (not (equal (node-namespace (last-open-element))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2553
                    (slot-value *parser* 'html-namespace)))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2554
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2555
 (defun parser-parse (source &key fragment-p encoding)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2556
   (with-slots (inner-html-mode container tokenizer)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2557
       *parser*
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2558
     (setf inner-html-mode fragment-p)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2559
     (when (stringp fragment-p)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2560
       (setf container fragment-p))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2561
     (setf tokenizer (make-html-tokenizer source
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2562
                                          :encoding encoding
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2563
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2564
                                          :cdata-switch-helper #'cdata-switch-helper))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2565
     (parser-reset)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2566
     (loop
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2567
           ;; The input stream will throw please-reparse with result true
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2568
           ;; if the encoding is changed
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2569
           while (catch 'please-reparse
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2570
                   (main-loop)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2571
                   nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2572
           do (parser-reset))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2573
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2574
 (defun parser-reset ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2575
   (with-slots (open-elements active-formatting-elements
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2576
                     head-pointer form-pointer insert-from-table
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2577
                     first-start-tag errors compat-mode inner-html-mode
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2578
                     inner-html container tokenizer phase last-phase
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2579
                     before-rcdata-phase frameset-ok
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2580
                     html-namespace)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2581
       *parser*
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2582
     (setf open-elements '())
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2583
     (setf active-formatting-elements '())
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2584
     (setf head-pointer nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2585
     (setf form-pointer nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2586
     (setf insert-from-table nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2587
     (setf first-start-tag nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2588
     (setf errors '())
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2589
     (setf compat-mode :no-quirks)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2590
     (cond (inner-html-mode
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2591
            (setf inner-html (string-downcase container))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2592
            (cond ((member inner-html +cdata-elements+ :test #'string=)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2593
                   (setf (slot-value tokenizer 'state) :rcdata-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2594
                  ((member inner-html +rcdata-elements+ :test #'string=)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2595
                   (setf (slot-value tokenizer 'state) :rawtext-state))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2596
                  ((string= inner-html "plaintext")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2597
                   (setf (slot-value tokenizer 'state) :plaintext-state)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2598
            (insert-root (implied-tag-token "html" :start-tag))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2599
            (setf phase :before-head)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2600
            (reset-insertion-mode))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2601
           (t
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2602
            (setf inner-html nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2603
            (setf phase :initial)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2604
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2605
     (setf last-phase nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2606
     (setf before-rcdata-phase nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2607
     (setf frameset-ok t)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2608
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2609
 (defun is-html-integration-point (element)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2610
   (if (and (string= (node-name element) "annotation-xml")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2611
            (string= (node-namespace element) (find-namespace "mathml")))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2612
       (and (element-attribute element "encoding")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2613
            (member (ascii-upper-2-lower (element-attribute element "encoding"))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2614
                    '("text/html" "application/xhtml+xml")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2615
                    :test #'string=))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2616
       (member (node-name-tuple element)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2617
               +html-integration-point-elements+
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2618
               :test #'equal)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2619
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2620
 (defun is-math-ml-text-integration-point (element)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2621
   (member (node-name-tuple element)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2622
           +mathml-text-integration-point-elements+
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2623
           :test #'equal))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2624
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2625
 (defun main-loop ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2626
   (with-slots (tokenizer phase)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2627
       *parser*
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2628
     (map-tokens tokenizer (lambda (token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2629
                             (process-token (normalize-token token))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2630
     (loop with reprocess = t
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2631
        with phases = '()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2632
        while reprocess do
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2633
          (push phase phases)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2634
          (setf reprocess (process-eof nil :phase phase))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2635
          (when reprocess
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2636
            (assert (not (member phase phases)))))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2637
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2638
 (defun process-token (token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2639
   (with-slots (tokenizer last-open-element html-namespace)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2640
       *parser*
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2641
     (let ((new-token token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2642
           (type))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2643
       (loop while new-token do
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2644
            (let* ((current-node (last-open-element))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2645
                   (current-node-namespace (if current-node (node-namespace current-node)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2646
                   (current-node-name (if current-node (node-name current-node))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2647
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2648
              (setf type (getf new-token :type))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2649
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2650
              (cond ((eql type :parse-error)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2651
                     (parser-parse-error (getf token :data) (getf token :datavars))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2652
                     (setf new-token nil))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2653
                    (t
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2654
                     (let (phase)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2655
                       (if (or (null (slot-value *parser* 'open-elements))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2656
                               (equal current-node-namespace html-namespace)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2657
                               (and (is-math-ml-text-integration-point current-node)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2658
                                    (or (and (eql type :start-tag)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2659
                                             (not (member (getf token :name) '("mglyph" "malignmark") :test #'string=)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2660
                                        (eql type :characters)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2661
                                        (eql type :space-characters)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2662
                               (and (equal current-node-namespace (find-namespace "mathml"))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2663
                                    (equal current-node-name "annotation-xml")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2664
                                    (eql type :start-tag)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2665
                                    (equal (getf token :name) "svg"))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2666
                               (and (is-html-integration-point current-node)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2667
                                    (member type '(:start-tag :characters :space-characters))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2668
                           (setf phase (slot-value *parser* 'phase))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2669
                           (setf phase :in-foreign-content))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2670
                       ;(format t "~&phase ~S token ~S~%" phase new-token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2671
                       (setf new-token
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2672
                             (ecase type
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2673
                               (:characters
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2674
                                (process-characters new-token :phase phase))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2675
                               (:space-characters
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2676
                                (process-space-characters new-token :phase phase))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2677
                               (:start-tag
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2678
                                (process-start-tag new-token :phase phase))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2679
                               (:end-tag
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2680
                                (process-end-tag new-token :phase phase))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2681
                               (:comment
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2682
                                (process-comment new-token :phase phase))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2683
                               (:doctype
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2684
                                (process-doctype new-token :phase phase))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2685
                       ;(format t "   phase returned ~S new-token ~S~%" phase new-token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2686
                                             ))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2687
            (when (and (eql type :start-tag)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2688
                       (getf token :self-closing)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2689
                       (not (getf token :self-closing-acknowledged)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2690
              (parser-parse-error :non-void-element-with-trailing-solidus
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2691
                                  `(:name ,(getf token :name))))))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2692
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2693
 (defun parser-parse-error (error-code &optional datavars)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2694
   (with-slots (errors) *parser*
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2695
     (push (list error-code datavars) errors)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2696
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2697
 ;; TODO rename to a longer and more descriptive name when we are done writing the code
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2698
 (defun perror (error-code &rest datavars)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2699
   (parser-parse-error error-code datavars))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2700
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2701
 (defun normalize-token (token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2702
   (when (getf token :start-tag)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2703
     ;; Remove duplicate attributes
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2704
     (setf (getf token :data) (remove-duplicates (getf token :data)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2705
                                                 :key #'car
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2706
                                                 :test #'string=
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2707
                                                 :from-end t)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2708
   token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2709
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2710
 (defun adjust-attributes (token replacements)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2711
   (setf (getf token :data)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2712
         (loop for (name . value) in (getf token :data)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2713
            collect (cons (or (cdr (assoc name replacements :test #'string=))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2714
                              name)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2715
                          value))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2716
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2717
 (defun adjust-math-ml-attributes (token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2718
   (adjust-attributes token '(("definitionurl" ."definitionURL"))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2719
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2720
 (defun adjust-svg-attributes (token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2721
   (adjust-attributes token '(("attributename" . "attributeName")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2722
                              ("attributetype" . "attributeType")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2723
                              ("basefrequency" . "baseFrequency")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2724
                              ("baseprofile" . "baseProfile")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2725
                              ("calcmode" . "calcMode")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2726
                              ("clippathunits" . "clipPathUnits")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2727
                              ("contentscripttype" . "contentScriptType")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2728
                              ("contentstyletype" . "contentStyleType")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2729
                              ("diffuseconstant" . "diffuseConstant")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2730
                              ("edgemode" . "edgeMode")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2731
                              ("externalresourcesrequired" . "externalResourcesRequired")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2732
                              ("filterres" . "filterRes")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2733
                              ("filterunits" . "filterUnits")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2734
                              ("glyphref" . "glyphRef")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2735
                              ("gradienttransform" . "gradientTransform")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2736
                              ("gradientunits" . "gradientUnits")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2737
                              ("kernelmatrix" . "kernelMatrix")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2738
                              ("kernelunitlength" . "kernelUnitLength")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2739
                              ("keypoints" . "keyPoints")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2740
                              ("keysplines" . "keySplines")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2741
                              ("keytimes" . "keyTimes")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2742
                              ("lengthadjust" . "lengthAdjust")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2743
                              ("limitingconeangle" . "limitingConeAngle")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2744
                              ("markerheight" . "markerHeight")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2745
                              ("markerunits" . "markerUnits")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2746
                              ("markerwidth" . "markerWidth")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2747
                              ("maskcontentunits" . "maskContentUnits")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2748
                              ("maskunits" . "maskUnits")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2749
                              ("numoctaves" . "numOctaves")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2750
                              ("pathlength" . "pathLength")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2751
                              ("patterncontentunits" . "patternContentUnits")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2752
                              ("patterntransform" . "patternTransform")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2753
                              ("patternunits" . "patternUnits")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2754
                              ("pointsatx" . "pointsAtX")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2755
                              ("pointsaty" . "pointsAtY")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2756
                              ("pointsatz" . "pointsAtZ")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2757
                              ("preservealpha" . "preserveAlpha")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2758
                              ("preserveaspectratio" . "preserveAspectRatio")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2759
                              ("primitiveunits" . "primitiveUnits")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2760
                              ("refx" . "refX")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2761
                              ("refy" . "refY")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2762
                              ("repeatcount" . "repeatCount")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2763
                              ("repeatdur" . "repeatDur")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2764
                              ("requiredextensions" . "requiredExtensions")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2765
                              ("requiredfeatures" . "requiredFeatures")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2766
                              ("specularconstant" . "specularConstant")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2767
                              ("specularexponent" . "specularExponent")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2768
                              ("spreadmethod" . "spreadMethod")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2769
                              ("startoffset" . "startOffset")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2770
                              ("stddeviation" . "stdDeviation")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2771
                              ("stitchtiles" . "stitchTiles")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2772
                              ("surfacescale" . "surfaceScale")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2773
                              ("systemlanguage" . "systemLanguage")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2774
                              ("tablevalues" . "tableValues")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2775
                              ("targetx" . "targetX")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2776
                              ("targety" . "targetY")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2777
                              ("textlength" . "textLength")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2778
                              ("viewbox" . "viewBox")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2779
                              ("viewtarget" . "viewTarget")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2780
                              ("xchannelselector" . "xChannelSelector")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2781
                              ("ychannelselector" . "yChannelSelector")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2782
                              ("zoomandpan" . "zoomAndPan"))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2783
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2784
 (defun adjust-foreign-attributes (token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2785
   (adjust-attributes token `(("xlink:actuate" . ("xlink" "actuate" ,(find-namespace "xlink")))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2786
                              ("xlink:arcrole" . ("xlink" "arcrole" ,(find-namespace "xlink")))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2787
                              ("xlink:href" . ("xlink" "href" ,(find-namespace "xlink")))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2788
                              ("xlink:role" . ("xlink" "role" ,(find-namespace "xlink")))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2789
                              ("xlink:show" . ("xlink" "show" ,(find-namespace "xlink")))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2790
                              ("xlink:title" . ("xlink" "title" ,(find-namespace "xlink")))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2791
                              ("xlink:type" . ("xlink" "type" ,(find-namespace "xlink")))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2792
                              ("xml:base" . ("xml" "base" ,(find-namespace "xml")))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2793
                              ("xml:lang" . ("xml" "lang" ,(find-namespace "xml")))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2794
                              ("xml:space" . ("xml" "space" ,(find-namespace "xml")))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2795
                              ("xmlns" . (nil "xmlns" ,(find-namespace "xmlns")))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2796
                              ("xmlns:xlink" . ("xmlns" "xlink" ,(find-namespace "xmlns"))))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2797
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2798
 (defun reset-insertion-mode ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2799
   (with-slots (inner-html html-namespace phase open-elements) *parser*
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2800
     (let ((last nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2801
           (new-phase nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2802
           (new-modes '(("select" . :in-select)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2803
                        ("td" . :in-cell)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2804
                        ("th" . :in-cell)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2805
                        ("tr" . :in-row)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2806
                        ("tbody" . :in-table-body)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2807
                        ("thead" . :in-table-body)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2808
                        ("tfoot" . :in-table-body)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2809
                        ("caption" . :in-caption)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2810
                        ("colgroup" . :in-column-group)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2811
                        ("table" . :in-table)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2812
                        ("head" . :in-body)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2813
                        ("body" . :in-body)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2814
                        ("frameset" . :in-frameset)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2815
                        ("html" . :before-head))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2816
       (loop for node in (reverse open-elements)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2817
          for node-name = (node-name node)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2818
          do
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2819
          (when (eql node (first open-elements))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2820
            (assert inner-html)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2821
            (setf last t)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2822
            (setf node-name inner-html))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2823
          ;; Check for conditions that should only happen in the innerHTML
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2824
          ;; case
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2825
          (when (member node-name '("select" "colgroup" "head" "html") :test #'string=)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2826
            (assert inner-html))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2827
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2828
          (unless (and (not last)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2829
                       (string/= (node-namespace node) html-namespace))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2830
            (let ((match (cdr (assoc node-name new-modes :test #'string=))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2831
              (when match
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2832
                (setf new-phase match)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2833
                (return))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2834
              (when last
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2835
                (setf new-phase :in-body)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2836
                (return)))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2837
       (setf phase new-phase))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2838
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2839
 (defun parse-rc-data-raw-text (token content-type)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2840
   (assert (member content-type '(:rawtext :rcdata)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2841
   (with-slots (tokenizer original-phase phase) *parser*
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2842
     (insert-element token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2843
     (setf (tokenizer-state tokenizer) (ecase content-type
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2844
                                         (:rawtext :rawtext-state)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2845
                                         (:rcdata :rcdata-state)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2846
     (setf original-phase phase)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2847
     (setf phase :text)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2848
     nil))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2849
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2850
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2851
 ;; Phases --------------------------------------------------------------------
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2852
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2853
 (defun implied-tag-token (name &optional (type :end-tag))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2854
   (list :type type :name name :data '() :self-closing nil))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2855
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2856
 (defun implied-tag-token/full (name type
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2857
                                &key (attributes '()) (self-closing nil))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2858
   (list :type type :name name :data attributes :self-closing self-closing))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2859
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2860
 (eval-when (:compile-toplevel :execute)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2861
   (defun phase-process-method-name (function-name)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2862
     (intern (concatenate 'string
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2863
                          "%"
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2864
                          (symbol-name function-name))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2865
             (symbol-package function-name))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2866
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2867
 (defvar *phase-indent* 0)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2868
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2869
 (defun call-phase-method (name phase token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2870
   ;(format *trace-output* "~&~vTcall: ~S ~S ~S" *phase-indent* name phase token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2871
   ;(break)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2872
   (let ((result (let ((*phase-indent* (+ 4 *phase-indent*)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2873
                   (funcall name phase token))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2874
     ;(format *trace-output* "~&~vTreturn: ~S ~S" *phase-indent* name result)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2875
     result))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2876
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2877
 (defmacro define-phase-process-functions (&body defs)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2878
   `(progn
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2879
      ,@(loop for function-name in defs
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2880
           for method-name = (phase-process-method-name function-name)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2881
           collect `(defgeneric ,method-name (phase token))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2882
           collect `(defun ,function-name (token &key (phase *phase*))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2883
                      (call-phase-method #',method-name phase token)))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2884
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2885
 (define-phase-process-functions
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2886
   add-formatting-element
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2887
   end-tag-applet-marquee-object
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2888
   end-tag-block
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2889
   end-tag-body
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2890
   end-tag-br
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2891
   end-tag-caption
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2892
   end-tag-col
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2893
   end-tag-colgroup
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2894
   end-tag-form
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2895
   end-tag-formatting
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2896
   end-tag-frameset
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2897
   end-tag-head
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2898
   end-tag-heading
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2899
   end-tag-html
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2900
   end-tag-html-body-br
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2901
   end-tag-ignore
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2902
   end-tag-imply
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2903
   end-tag-imply-head
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2904
   end-tag-list-item
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2905
   end-tag-optgroup
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2906
   end-tag-option
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2907
   end-tag-other
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2908
   end-tag-p
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2909
   end-tag-script
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2910
   end-tag-select
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2911
   end-tag-table
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2912
   end-tag-table-cell
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2913
   end-tag-table-row-group
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2914
   end-tag-tr
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2915
   insert-text
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2916
   process-characters
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2917
   process-comment
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2918
   process-doctype
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2919
   process-end-tag
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2920
   process-eof
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2921
   process-space-characters
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2922
   process-start-tag
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2923
   start-tag-a
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2924
   start-tag-applet-marquee-object
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2925
   start-tag-base-link-command
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2926
   start-tag-body
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2927
   start-tag-button
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2928
   start-tag-caption
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2929
   start-tag-close-p
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2930
   start-tag-col
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2931
   start-tag-colgroup
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2932
   start-tag-form
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2933
   start-tag-formatting
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2934
   start-tag-frame
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2935
   start-tag-frameset
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2936
   start-tag-from-head
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2937
   start-tag-head
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2938
   start-tag-heading
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2939
   start-tag-hr
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2940
   start-tag-html
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2941
   start-tag-i-frame
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2942
   start-tag-image
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2943
   start-tag-imply-tbody
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2944
   start-tag-input
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2945
   start-tag-is-index
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2946
   start-tag-list-item
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2947
   start-tag-math
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2948
   start-tag-meta
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2949
   start-tag-misplaced
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2950
   start-tag-no-script-no-frames-style
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2951
   start-tag-nobr
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2952
   start-tag-noframes
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2953
   start-tag-opt
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2954
   start-tag-optgroup
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2955
   start-tag-option
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2956
   start-tag-other
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2957
   start-tag-param-source
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2958
   start-tag-plaintext
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2959
   start-tag-pre-listing
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2960
   start-tag-process-in-head
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2961
   start-tag-rawtext
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2962
   start-tag-row-group
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2963
   start-tag-rp-rt
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2964
   start-tag-script
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2965
   start-tag-select
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2966
   start-tag-style-script
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2967
   start-tag-svg
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2968
   start-tag-table
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2969
   start-tag-table-cell
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2970
   start-tag-table-element
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2971
   start-tag-table-other
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2972
   start-tag-textarea
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2973
   start-tag-title
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2974
   start-tag-tr
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2975
   start-tag-void-formatting
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2976
   start-tag-xmp)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2977
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2978
 (defmacro def (phase name (&rest slots) &body body)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2979
   `(defmethod ,(phase-process-method-name name) ((*phase* (eql ,phase)) token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2980
      (with-slots (,@slots) *parser*
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2981
        ,@body)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2982
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2983
 (defmacro tagname-dispatch (phase name &body cases)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2984
   `(def ,phase ,name ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2985
      (let ((tagname (getf token :name)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2986
        (declare (ignorable tagname))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2987
        ,(let* ((default '(error "Unhandled tag ~S" tagname))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2988
                (string-cases
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2989
                  (loop for (tagnames function) in cases
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2990
                        append (cond ((stringp tagnames)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2991
                                      `((,tagnames (,function token))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2992
                                     ((consp tagnames)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2993
                                      (loop for tag in tagnames
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2994
                                            collect `(,tag (,function token))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2995
                                     ((eql 'default tagnames)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2996
                                      (setf default `(,function token))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2997
                                      nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2998
                                     (t (error "Invalid tag name clause ~S" tagnames))))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2999
           (if (not string-cases)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3000
               default
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3001
               `(string-case
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3002
                    (tagname :default ,default)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3003
                  ,@string-cases))))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3004
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3005
 ;; Default methods
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3006
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3007
 (defmethod %process-comment (*phase* token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3008
   ;; For most phases the following is correct. Where it's not it will be
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3009
   ;; overridden.
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3010
   (insert-comment token (last-open-element))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3011
   nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3012
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3013
 (defmethod %process-doctype (*phase* token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3014
   (parser-parse-error :unexpected-doctype)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3015
   nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3016
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3017
 (defmethod %process-characters (*phase* token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3018
   (parser-insert-text (getf token :data))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3019
   nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3020
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3021
 (defmethod %process-space-characters (*phase* token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3022
   (parser-insert-text (getf token :data))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3023
   nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3024
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3025
 (defmethod %start-tag-html (*phase* token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3026
   (with-slots (first-start-tag open-elements)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3027
       *parser*
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3028
     (when (and (not first-start-tag)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3029
                (string= (getf token :name) "html"))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3030
       (parser-parse-error :non-html-root))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3031
     ;; XXX Need a check here to see if the first start tag token emitted is
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3032
     ;; this token... If it's not, invoke self.parser.parseError().
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3033
     (let ((root-element (first open-elements)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3034
       (loop for (name . value) in (getf token :data)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3035
             do (unless (element-attribute root-element name)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3036
                  (setf (element-attribute root-element name) value))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3037
     (setf first-start-tag nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3038
     nil))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3039
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3040
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3041
 ;; InitialPhase
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3042
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3043
 (def :initial process-space-characters ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3044
   nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3045
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3046
 (def :initial process-comment ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3047
   (insert-comment token (document*))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3048
   nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3049
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3050
 (def :initial process-doctype (compat-mode phase)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3051
   (destructuring-bind (&key name public-id system-id correct &allow-other-keys)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3052
       token
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3053
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3054
     (when (or (string/= name "html")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3055
               public-id
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3056
               (and system-id (string/= system-id "about:legacy-compat")))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3057
       (parser-parse-error :unknown-doctype))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3058
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3059
     (unless public-id
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3060
       (setf public-id ""))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3061
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3062
     (insert-doctype token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3063
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3064
     (setf public-id (ascii-upper-2-lower public-id))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3065
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3066
     (cond ((or (not correct)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3067
                (string/= name "html")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3068
                (cl-ppcre:scan +quirks-mode-doctypes-regexp+ public-id)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3069
                (member public-id '("-//w3o//dtd w3 html strict 3.0//en//"
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3070
                                    "-/w3c/dtd html 4.0 transitional/en"
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3071
                                    "html")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3072
                        :test #'string=)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3073
                (and (not system-id)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3074
                     (cl-ppcre:scan '(:sequence :start-anchor (:alternation
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3075
                                                               "-//w3c//dtd html 4.01 frameset//"
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3076
                                                               "-//w3c//dtd html 4.01 transitional//"))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3077
                                    public-id))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3078
                (and system-id
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3079
                     (equal (ascii-upper-2-lower system-id)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3080
                            "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd")))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3081
            (setf compat-mode :quirks))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3082
           ((or (cl-ppcre:scan '(:sequence :start-anchor (:alternation
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3083
                                                          "-//w3c//dtd xhtml 1.0 frameset//"
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3084
                                                          "-//w3c//dtd xhtml 1.0 transitional//"))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3085
                               public-id)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3086
                (and system-id
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3087
                     (cl-ppcre:scan '(:sequence :start-anchor (:alternation
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3088
                                                               "-//w3c//dtd html 4.01 frameset//"
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3089
                                                               "-//w3c//dtd html 4.01 transitional//"))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3090
                                    public-id)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3091
            (setf compat-mode :limited-quirks)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3092
     (setf phase :before-html)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3093
     nil))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3094
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3095
 (flet ((anything-else ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3096
          (with-slots (compat-mode phase)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3097
              *parser*
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3098
            (setf compat-mode :quirks)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3099
            (setf phase :before-html))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3100
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3101
   (def :initial process-characters ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3102
     (parser-parse-error :expected-doctype-but-got-chars)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3103
     (anything-else)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3104
     token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3105
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3106
   (def :initial process-start-tag ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3107
     (parser-parse-error :expected-doctype-but-got-start-tag
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3108
                         (list :name (getf token :name)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3109
     (anything-else)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3110
     token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3111
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3112
   (def :initial process-end-tag ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3113
     (parser-parse-error :expected-doctype-but-got-end-tag
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3114
                         (list :name (getf token :name)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3115
     (anything-else)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3116
     token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3117
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3118
   (def :initial process-eof ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3119
     (parser-parse-error :expected-doctype-but-got-eof)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3120
     (anything-else)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3121
     t))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3122
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3123
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3124
 ;; BeforeHtmlPhase
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3125
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3126
 (flet ((insert-html-element ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3127
          (insert-root (implied-tag-token "html" :start-tag))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3128
          (setf (parser-phase *parser*) :before-head)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3129
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3130
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3131
   (def :before-html process-eof ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3132
     (insert-html-element)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3133
     t)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3134
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3135
   (def :before-html process-comment ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3136
     (insert-comment token (document*))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3137
     nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3138
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3139
   (def :before-html process-space-characters ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3140
     nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3141
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3142
   (def :before-html process-characters ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3143
     (insert-html-element)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3144
     token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3145
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3146
   (def :before-html process-start-tag (first-start-tag)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3147
     (when (string= (getf token :name) "html")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3148
       (setf first-start-tag t))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3149
     (insert-html-element)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3150
     token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3151
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3152
   (def :before-html process-end-tag ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3153
     (cond ((not (member (getf token :name) '("head" "body" "html" "br") :test #'string=))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3154
            (parser-parse-error :unexpected-end-tag-before-html `(:name ,(getf token :name)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3155
            nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3156
           (t
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3157
            (insert-html-element)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3158
            token))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3159
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3160
 ;; BeforeHeadPhase
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3161
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3162
 (tagname-dispatch :before-head process-start-tag
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3163
   ("html" start-tag-html)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3164
   ("head" start-tag-head token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3165
   (default start-tag-other))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3166
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3167
 (tagname-dispatch :before-head process-end-tag
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3168
   (("head" "body" "html" "br") end-tag-imply-head)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3169
   (default end-tag-other))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3170
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3171
 (def :before-head process-eof ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3172
   (start-tag-head (implied-tag-token "head" :start-tag))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3173
   t)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3174
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3175
 (def :before-head process-space-characters ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3176
   nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3177
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3178
 (def :before-head process-characters ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3179
   (start-tag-head (implied-tag-token "head" :start-tag))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3180
   token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3181
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3182
 (def :before-head start-tag-html ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3183
   (process-start-tag token :phase :in-body))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3184
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3185
 (def :before-head start-tag-head (head-pointer)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3186
   (insert-element token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3187
   (setf head-pointer (last-open-element))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3188
   (setf (parser-phase *parser*) :in-head)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3189
   nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3190
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3191
 (def :before-head start-tag-other ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3192
   (start-tag-head (implied-tag-token "head" :start-tag))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3193
   token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3194
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3195
 (def :before-head end-tag-imply-head ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3196
   (start-tag-head (implied-tag-token "head" :start-tag))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3197
   token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3198
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3199
 (def :before-head end-tag-other ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3200
   (parser-parse-error :end-tag-after-implied-root `(:name ,(getf token :name)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3201
   nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3202
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3203
 ;; InHeadPhase
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3204
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3205
 (tagname-dispatch :in-head process-start-tag
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3206
   ("html" start-tag-html)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3207
   ("title" start-tag-title)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3208
   (("noscript" "noframes" "style") start-tag-no-script-no-frames-style)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3209
   ("script" start-tag-script)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3210
   (("base" "basefont" "bgsound" "command" "link") start-tag-base-link-command)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3211
   ("meta" start-tag-meta)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3212
   ("head" start-tag-head)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3213
   (default start-tag-other))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3214
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3215
 (tagname-dispatch :in-head process-end-tag
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3216
   ("head" end-tag-head)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3217
   (("br" "html" "body") end-tag-html-body-br)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3218
   (default end-tag-other))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3219
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3220
 (flet ((anything-else ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3221
          (end-tag-head (implied-tag-token "head"))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3222
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3223
   ;; the real thing
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3224
   (def :in-head process-eof ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3225
     (anything-else)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3226
     t)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3227
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3228
   (def :in-head process-characters ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3229
     (anything-else)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3230
     token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3231
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3232
   (def :in-head start-tag-html ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3233
     (process-start-tag token :phase :in-body))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3234
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3235
   (def :in-head start-tag-head ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3236
     (parser-parse-error :two-heads-are-not-better-than-one)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3237
     nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3238
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3239
   (def :in-head start-tag-base-link-command (open-elements)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3240
     (insert-element token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3241
     (pop-end open-elements)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3242
     (setf (getf token :self-closing-acknowledged) t)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3243
     nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3244
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3245
   (defun parse-content-attr (string)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3246
     "The algorithm for extracting an encoding from a meta element"
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3247
     (let ((position 0))                 ; Step 1
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3248
       (labels ((char-at (index)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3249
                  (and (< position (length string))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3250
                       (char string index)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3251
                (skip-space ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3252
                  (loop while (member (char-at position) +space-characters+)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3253
                        do (incf position))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3254
         ;; Step 2
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3255
         (loop
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3256
           (setf position (search "charset" string :start2 position))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3257
           (unless position
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3258
             (return-from parse-content-attr))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3259
           ;; Set position to after charset
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3260
           (incf position 7)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3261
           ;; Step 3
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3262
           (skip-space)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3263
           ;; Step 4
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3264
           (when (eql (char-at position) #\=)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3265
             (return))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3266
           (decf position))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3267
         ;; Step 5
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3268
         (incf position)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3269
         (skip-space)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3270
         ;; Step 6
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3271
         (let ((next-char (char-at position)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3272
           (cond ((or (eql #\' next-char)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3273
                      (eql #\" next-char))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3274
                  (incf position)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3275
                  (let ((end (position next-char string :start position)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3276
                    (when end
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3277
                      (subseq string position end))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3278
                 (next-char
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3279
                  (let ((start position))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3280
                    (loop until (or (= position (length string))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3281
                                    (member (char-at position) +space-characters+))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3282
                          do (incf position))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3283
                    (subseq string start position))))))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3284
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3285
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3286
   (def :in-head start-tag-meta (tokenizer open-elements)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3287
     (insert-element token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3288
     (pop-end open-elements)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3289
     (setf (getf token :self-closing-acknowledged) t)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3290
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3291
     (let ((attributes (getf token :data)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3292
       (when (eql (cdr (html5-stream-encoding (tokenizer-stream tokenizer))) :tentative)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3293
         (cond ((assoc "charset" attributes :test #'string=)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3294
                (html5-stream-change-encoding (tokenizer-stream tokenizer)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3295
                                              (cdr (assoc "charset" attributes :test #'string=))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3296
               ((and (assoc "http-equiv" attributes :test #'string=)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3297
                     (ascii-istring= (cdr (assoc "http-equiv" attributes :test #'string=))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3298
                                     "Content-Type")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3299
                     (assoc "content" attributes :test #'string=))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3300
                (let* ((content (cdr (assoc "content" attributes :test #'string=)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3301
                       (new-encoding (parse-content-attr content)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3302
                  (if new-encoding
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3303
                      (html5-stream-change-encoding (tokenizer-stream tokenizer)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3304
                                                    new-encoding)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3305
                      (parser-parse-error :invalid-encoding-declaration
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3306
                                          `(:content ,content))))))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3307
     nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3308
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3309
   (def :in-head start-tag-title ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3310
     (parse-rc-data-raw-text token :rcdata)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3311
     nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3312
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3313
   (def :in-head start-tag-no-script-no-frames-style ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3314
     ;; Need to decide whether to implement the scripting-disabled case
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3315
     (parse-rc-data-raw-text token :rawtext))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3316
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3317
   (def :in-head start-tag-script (tokenizer original-phase phase)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3318
     (insert-element token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3319
     (setf (tokenizer-state tokenizer) :script-data-state)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3320
     (setf original-phase phase)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3321
     (setf phase :text)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3322
     nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3323
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3324
   (def :in-head start-tag-other ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3325
     (anything-else)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3326
     token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3327
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3328
   (def :in-head end-tag-head (phase open-elements)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3329
     (let ((node (pop-end open-elements)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3330
       (assert (string= (node-name node) "head") ()  "Expected head got ~S" (node-name node))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3331
       (setf phase :after-head)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3332
       nil))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3333
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3334
   (def :in-head end-tag-html-body-br ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3335
     (anything-else)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3336
     token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3337
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3338
   (def :in-head end-tag-other ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3339
     (parser-parse-error :unexpected-end-tag `(:name ,(getf token :name)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3340
     nil))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3341
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3342
 ;; XXX If we implement a parser for which scripting is disabled we need to
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3343
 ;; implement this phase.
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3344
 ;;
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3345
 ;; InHeadNoScriptPhase
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3346
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3347
 ;; AfterHeadPhase
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3348
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3349
 (tagname-dispatch :after-head process-start-tag
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3350
   ("html" start-tag-html)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3351
   ("body" start-tag-body)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3352
   ("frameset" start-tag-frameset)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3353
   (("base" "basefont" "bgsound" "link" "meta"
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3354
            "noframes" "script" "style" "title")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3355
    start-tag-from-head)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3356
   ("head" start-tag-head)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3357
   (default start-tag-other))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3358
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3359
 (tagname-dispatch :after-head process-end-tag
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3360
   (("body" "html" "br") end-tag-html-body-br)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3361
   (default end-tag-other))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3362
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3363
 (flet ((anything-else ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3364
          (with-slots (phase frameset-ok) *parser*
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3365
            (insert-element (implied-tag-token "body" :start-tag))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3366
            (setf phase :in-body)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3367
            (setf frameset-ok t))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3368
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3369
   (def :after-head process-eof ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3370
     (anything-else)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3371
     t)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3372
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3373
   (def :after-head process-characters ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3374
     (anything-else)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3375
     token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3376
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3377
   (def :after-head start-tag-html ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3378
     (process-start-tag token :phase :in-body))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3379
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3380
   (def :after-head start-tag-body (phase frameset-ok)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3381
     (setf frameset-ok nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3382
     (insert-element token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3383
     (setf phase :in-body)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3384
     nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3385
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3386
   (def :after-head start-tag-frameset (phase)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3387
     (insert-element token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3388
     (setf phase :in-frameset)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3389
     nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3390
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3391
   (def :after-head start-tag-from-head (head-pointer open-elements)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3392
     (parser-parse-error :unexpected-start-tag-out-of-my-head
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3393
                         `(:name ,(getf token :name)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3394
     (push-end head-pointer open-elements)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3395
     (process-start-tag token :phase :in-head)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3396
     (loop for node in (reverse open-elements)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3397
           do (when (string= "head" (node-name node))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3398
                (setf open-elements
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3399
                      (remove node open-elements :test #'equal))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3400
                (return)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3401
     nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3402
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3403
   (def :after-head start-tag-head ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3404
     (parser-parse-error :unexpected-start-tag
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3405
                         `(:name ,(getf token :name)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3406
     nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3407
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3408
   (def :after-head start-tag-other ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3409
     (anything-else)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3410
     token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3411
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3412
   (def :after-head end-tag-html-body-br ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3413
     (anything-else)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3414
     token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3415
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3416
   (def :after-head end-tag-other ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3417
     (parser-parse-error :unexpected-end-tag
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3418
                         `(:name ,(getf token :name)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3419
     nil))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3420
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3421
 ;; InBodyPhase
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3422
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3423
 (tagname-dispatch :in-body process-start-tag
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3424
   ("html" start-tag-html)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3425
   (("base" "basefont" "bgsound" "command" "link"
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3426
            "meta" "noframes" "script" "style" "title")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3427
    start-tag-process-in-head)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3428
   ("body" start-tag-body)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3429
   ("frameset" start-tag-frameset)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3430
   (("address" "article" "aside" "blockquote" "center" "details"
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3431
               "dir" "div" "dl" "fieldset" "figcaption" "figure"
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3432
               "footer" "header" "hgroup" "menu" "nav" "ol" "p"
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3433
               "section" "summary" "ul")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3434
    start-tag-close-p)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3435
   (#.+heading-elements+ start-tag-heading)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3436
   (("pre" "listing") start-tag-pre-listing)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3437
   ("form" start-tag-form)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3438
   (("li" "dd" "dt") start-tag-list-item)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3439
   ("plaintext" start-tag-plaintext)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3440
   ("a" start-tag-a)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3441
   (("b" "big" "code" "em" "font" "i" "s" "small" "strike"
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3442
         "strong" "tt" "u")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3443
    start-tag-formatting)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3444
   ("nobr" start-tag-nobr)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3445
   ("button" start-tag-button)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3446
   (("applet" "marquee" "object") start-tag-applet-marquee-object)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3447
   ("xmp" start-tag-xmp)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3448
   ("table" start-tag-table)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3449
   (("area" "br" "embed" "img" "keygen" "wbr")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3450
    start-tag-void-formatting)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3451
   (("param" "source" "track") start-tag-param-source)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3452
   ("input" start-tag-input)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3453
   ("hr" start-tag-hr)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3454
   ("image" start-tag-image)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3455
   ("isindex" start-tag-is-index)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3456
   ("textarea" start-tag-textarea)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3457
   ("iframe" start-tag-i-frame)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3458
   (("noembed" "noscript") start-tag-rawtext)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3459
   ("select" start-tag-select)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3460
   (("rp" "rt") start-tag-rp-rt)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3461
   (("option" "optgroup") start-tag-opt)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3462
   (("math") start-tag-math)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3463
   (("svg") start-tag-svg)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3464
   (("caption" "col" "colgroup" "frame" "head"
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3465
               "tbody" "td" "tfoot" "th" "thead"
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3466
               "tr")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3467
    start-tag-misplaced)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3468
   (default start-tag-other))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3469
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3470
 (tagname-dispatch :in-body process-end-tag
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3471
   ("body" end-tag-body)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3472
   ("html" end-tag-html)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3473
   (("address" "article" "aside" "blockquote" "button" "center"
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3474
               "details" "dir" "div" "dl" "fieldset" "figcaption" "figure"
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3475
               "footer" "header" "hgroup" "listing" "menu" "nav" "ol" "pre"
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3476
               "section" "summary" "ul")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3477
    end-tag-block)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3478
   ("form" end-tag-form)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3479
   ("p" end-tag-p)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3480
   (("dd" "dt" "li") end-tag-list-item)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3481
   (#.+heading-elements+ end-tag-heading)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3482
   (("a" "b" "big" "code" "em" "font" "i" "nobr" "s" "small"
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3483
         "strike" "strong" "tt" "u")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3484
    end-tag-formatting)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3485
   (("applet" "marquee" "object") end-tag-applet-marquee-object)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3486
   ("br" end-tag-br)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3487
   (default end-tag-other))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3488
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3489
 (flet ((is-matching-formatting-element (node1 node2)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3490
          (and (equal (node-name node1) (node-name node2))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3491
               (equal (node-namespace node1) (node-namespace node2))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3492
               (node-attributes= node1 node2))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3493
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3494
   (def :in-body add-formatting-element (reverse active-formatting-elements)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3495
     (insert-element token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3496
     (let ((element (last-open-element))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3497
           matching-elements)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3498
       (loop for node in (reverse active-formatting-elements)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3499
             do (if (eq node :marker)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3500
                    (return)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3501
                    (when (is-matching-formatting-element node element)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3502
                      (push-end node matching-elements))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3503
       (assert (<= (length matching-elements) 3))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3504
       (when (= (length matching-elements) 3)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3505
         (setf active-formatting-elements
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3506
               (remove (car (last matching-elements))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3507
                       active-formatting-elements)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3508
       (assert element)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3509
       (push-end element active-formatting-elements))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3510
     nil))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3511
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3512
 (def :in-body process-eof (open-elements)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3513
   (let ((allowed-elements '("dd" "dt" "li" "p" "tbody" "td"
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3514
                             "tfoot" "th" "thead" "tr" "body" "html")))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3515
     (loop for node in (reverse open-elements)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3516
           do (when (not (member (node-name node)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3517
                                 allowed-elements
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3518
                                 :test #'string=))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3519
                (parser-parse-error :expected-closing-tag-but-got-eof)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3520
                (return))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3521
   nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3522
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3523
 (def :in-body process-characters (frameset-ok)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3524
   (let ((data (getf token :data)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3525
     (if (equal data (string #\u0000))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3526
         nil
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3527
         (progn
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3528
           (reconstruct-active-formatting-elements)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3529
           (parser-insert-text data)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3530
           ;;This must be bad for performance
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3531
           (when (and frameset-ok
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3532
                      (notevery (lambda (char)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3533
                                  (find char +space-characters+))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3534
                                data))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3535
             (setf frameset-ok nil))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3536
           nil))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3537
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3538
 (def :in-body process-space-characters (in-body-process-space-characters-mode)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3539
   (ecase in-body-process-space-characters-mode
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3540
     (:non-pre
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3541
      (reconstruct-active-formatting-elements)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3542
      (parser-insert-text (getf token :data)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3543
     (:drop-newline
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3544
      (let ((data (getf token :data)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3545
        (setf in-body-process-space-characters-mode :non-pre)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3546
        (when (and (plusp (length data))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3547
                   (char= #\Newline (char data 0))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3548
                   (member (node-name (last-open-element))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3549
                           '("pre" "listing" "textarea")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3550
                           :test #'string=)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3551
                   (not (node-has-content (last-open-element))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3552
          (setf data (subseq data 1)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3553
        (when (plusp (length data))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3554
          (reconstruct-active-formatting-elements)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3555
          (parser-insert-text data)))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3556
   nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3557
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3558
 (def :in-body start-tag-process-in-head ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3559
   (process-start-tag token :phase :in-head))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3560
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3561
 (def :in-body start-tag-body (frameset-ok open-elements)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3562
   (parser-parse-error :unexpected-start-tag
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3563
                       `(:name ,(getf token :name)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3564
   (if (or (= 1 (length open-elements))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3565
           (string/= (node-name (second open-elements)) "body"))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3566
       (assert (slot-value *parser* 'inner-html))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3567
       (progn
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3568
         (setf frameset-ok nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3569
         (loop for (name . value) in (getf token :data)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3570
               do (unless (element-attribute (second open-elements) name)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3571
                    (setf (element-attribute (second open-elements) name) value)))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3572
   nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3573
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3574
 (def :in-body start-tag-frameset (frameset-ok phase open-elements)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3575
   (parser-parse-error :unexpected-start-tag
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3576
                       `(:name ,(getf token :name)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3577
   (cond ((or (= 1 (length open-elements))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3578
              (string/= (node-name (second open-elements)) "body"))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3579
          (assert (slot-value *parser* 'inner-html)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3580
         ((not frameset-ok)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3581
          nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3582
         (t
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3583
          (when (node-parent (second open-elements))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3584
            (node-remove-child (node-parent (second open-elements))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3585
                               (second open-elements)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3586
          (loop until (string= (node-name (last-open-element))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3587
                               "html")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3588
                do (pop-end open-elements))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3589
          (insert-element token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3590
          (setf phase :in-frameset)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3591
   nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3592
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3593
 (def :in-body start-tag-close-p ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3594
   (when (element-in-scope "p" "button")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3595
     (end-tag-p (implied-tag-token "p")))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3596
   (insert-element token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3597
   nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3598
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3599
 (def :in-body start-tag-pre-listing (in-body-process-space-characters-mode frameset-ok)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3600
   (when (element-in-scope "p" "button")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3601
     (end-tag-p (implied-tag-token "p")))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3602
   (insert-element token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3603
   (setf frameset-ok nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3604
   (setf in-body-process-space-characters-mode :drop-newline)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3605
   nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3606
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3607
 (def :in-body start-tag-form (form-pointer)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3608
   (if form-pointer
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3609
       (parser-parse-error :unexpected-start-tag
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3610
                           `(:name ,(getf token :name)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3611
       (progn
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3612
         (when (element-in-scope "p" "button")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3613
           (end-tag-p (implied-tag-token "p")))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3614
         (insert-element token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3615
         (setf form-pointer (last-open-element))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3616
   nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3617
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3618
 (def :in-body start-tag-list-item (phase frameset-ok open-elements)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3619
   (setf frameset-ok nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3620
   (let ((stop-names (cond ((string= (getf token :name) "li")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3621
                            '("li"))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3622
                           ((string= (getf token :name) "dt")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3623
                            '("dt" "dd"))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3624
                           ((string= (getf token :name) "dd")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3625
                            '("dt" "dd")))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3626
     (loop for node in (reverse open-elements)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3627
           do (cond ((member (node-name node) stop-names :test #'string=)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3628
                     (process-end-tag (implied-tag-token (node-name node)) :phase phase)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3629
                     (return))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3630
                    ((and (member (node-name-tuple node) +special-elements+
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3631
                                  :test #'equal)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3632
                          (not (member (node-name node)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3633
                                       '("address" "div" "p")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3634
                                       :test #'string=)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3635
                     (return)))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3636
   (when (element-in-scope "p" "button")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3637
     (process-end-tag (implied-tag-token "p") :phase phase))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3638
   (insert-element token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3639
   nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3640
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3641
 (def :in-body start-tag-plaintext (tokenizer)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3642
   (when (element-in-scope "p" "button")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3643
     (end-tag-p (implied-tag-token "p")))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3644
   (insert-element token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3645
   (setf (tokenizer-state tokenizer) :plaintext-state)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3646
   nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3647
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3648
 (def :in-body start-tag-heading (open-elements)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3649
   (when (element-in-scope "p" "button")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3650
     (end-tag-p (implied-tag-token "p")))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3651
   (when (member (node-name (last-open-element)) +heading-elements+
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3652
                 :test #'string=)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3653
     (perror :unexpected-start-tag :name (getf token :name))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3654
     (pop-end open-elements))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3655
   (insert-element token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3656
   nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3657
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3658
 (def :in-body start-tag-a (open-elements active-formatting-elements)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3659
   (let ((afe-a-element (element-in-active-formatting-elements "a")))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3660
     (when afe-a-element
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3661
       (perror :unexpected-start-tag-implies-end-tag
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3662
               :start-name "a" :end-name "a")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3663
       (end-tag-formatting (implied-tag-token "a"))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3664
       (when (member afe-a-element open-elements)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3665
         (setf open-elements
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3666
               (remove afe-a-element open-elements)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3667
       (when (member afe-a-element active-formatting-elements)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3668
         (setf active-formatting-elements
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3669
               (remove afe-a-element active-formatting-elements))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3670
     (reconstruct-active-formatting-elements)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3671
     (add-formatting-element token))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3672
   nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3673
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3674
 (def :in-body start-tag-formatting ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3675
   (reconstruct-active-formatting-elements)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3676
   (add-formatting-element token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3677
   nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3678
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3679
 (def :in-body start-tag-nobr ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3680
   (reconstruct-active-formatting-elements)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3681
   (when (element-in-scope "nobr")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3682
     (perror :unexpected-start-tag-implies-end-tag
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3683
             :start-name "nobr" :end-name "nobr")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3684
     (process-end-tag (implied-tag-token "nobr"))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3685
     ;; XXX Need tests that trigger the following
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3686
     (reconstruct-active-formatting-elements))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3687
   (add-formatting-element token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3688
   nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3689
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3690
 (def :in-body start-tag-button (frameset-ok)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3691
   (cond ((element-in-scope "button")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3692
          (perror :unexpected-start-tag-implies-end-tag
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3693
                  :start-name "button" :end-name "button")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3694
          (process-end-tag (implied-tag-token "button"))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3695
          token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3696
         (t
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3697
          (reconstruct-active-formatting-elements)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3698
          (insert-element token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3699
          (setf frameset-ok nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3700
          nil)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3701
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3702
 (def :in-body start-tag-applet-marquee-object (frameset-ok active-formatting-elements)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3703
   (reconstruct-active-formatting-elements)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3704
   (insert-element token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3705
   (push-end :marker active-formatting-elements)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3706
   (setf frameset-ok nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3707
   nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3708
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3709
 (def :in-body start-tag-xmp (frameset-ok)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3710
   (when (element-in-scope "p" "button")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3711
     (end-tag-p (implied-tag-token "p")))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3712
   (reconstruct-active-formatting-elements)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3713
   (setf frameset-ok nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3714
   (parse-rc-data-raw-text token :rawtext)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3715
   nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3716
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3717
 (def :in-body start-tag-table (frameset-ok compat-mode phase)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3718
   (when (not (eq compat-mode :quirks))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3719
     (when (element-in-scope "p" "button")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3720
       (end-tag-p (implied-tag-token "p"))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3721
   (insert-element token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3722
   (setf frameset-ok nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3723
   (setf phase :in-table)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3724
   nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3725
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3726
 (def :in-body start-tag-void-formatting (frameset-ok open-elements)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3727
   (reconstruct-active-formatting-elements)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3728
   (insert-element token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3729
   (pop-end open-elements)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3730
   (setf (getf token :self-closing-acknowledged) t)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3731
   (setf frameset-ok nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3732
   nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3733
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3734
 (def :in-body start-tag-input (frameset-ok)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3735
   (let ((old-frameset-ok frameset-ok))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3736
     (start-tag-void-formatting token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3737
     (let ((type (assoc "type" (getf token :data) :test #'string=)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3738
       (when (and type
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3739
                  (string= (ascii-upper-2-lower (cdr type)) "hidden"))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3740
         ;;input type=hidden doesn't change framesetOK
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3741
         (setf frameset-ok old-frameset-ok))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3742
   nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3743
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3744
 (def :in-body start-tag-param-source (open-elements)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3745
   (insert-element token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3746
   (pop-end open-elements)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3747
   (setf (getf token :self-closing-acknowledged) t)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3748
   nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3749
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3750
 (def :in-body start-tag-hr (frameset-ok open-elements)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3751
   (when (element-in-scope "p" "button")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3752
     (end-tag-p (implied-tag-token "p")))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3753
   (insert-element token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3754
   (pop-end open-elements)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3755
   (setf (getf token :self-closing-acknowledged) t)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3756
   (setf frameset-ok nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3757
   nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3758
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3759
 (def :in-body start-tag-image ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3760
   (perror :unexpected-start-tag-treated-as
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3761
           :original-name "image" :new-name "img")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3762
   (process-start-tag (implied-tag-token/full
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3763
                       "img" :start-tag
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3764
                       :attributes (getf token :data)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3765
                       :self-closing (getf token :self-closing)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3766
   nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3767
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3768
 (def :in-body start-tag-is-index (form-pointer)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3769
   (block nil
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3770
     (perror :deprecated-tag :name "isindex")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3771
     (when form-pointer
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3772
       (return nil))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3773
     (let (attrs)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3774
       (when (assoc "action" (getf token :data) :test #'string=)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3775
         (setf attrs (list (assoc "action" (getf token :data) :test #'string=))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3776
       (process-start-tag (implied-tag-token/full "form" :start-tag
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3777
                                                  :attributes attrs)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3778
     (process-start-tag (implied-tag-token "hr" :start-tag))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3779
     (process-start-tag (implied-tag-token "label" :start-tag))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3780
     ;; XXX Localization ...
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3781
     (let ((prompt (if (assoc "prompt" (getf token :data) :test #'string=)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3782
                       (cdr (assoc "prompt" (getf token :data) :test #'string=))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3783
                       "This is a searchable index. Enter search keywords: ")))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3784
       (process-characters (list :type :characters :data prompt)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3785
     (let ((attrs (append (remove-if (lambda (el)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3786
                                       (member (car el) '("action" "prompt" "name")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3787
                                               :test #'string=))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3788
                                     (copy-list (getf token :data)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3789
                          (copy-list '(("name" . "isindex"))))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3790
       (process-start-tag (implied-tag-token/full "input" :start-tag
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3791
                                                  :attributes attrs
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3792
                                                  :self-closing
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3793
                                                  (getf token :self-closing))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3794
     (process-end-tag (implied-tag-token "label"))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3795
     (process-start-tag (implied-tag-token "hr" :start-tag))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3796
     (process-end-tag (implied-tag-token "form")))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3797
   nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3798
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3799
 (def :in-body start-tag-textarea (tokenizer
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3800
                                   in-body-process-space-characters-mode
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3801
                                   frameset-ok)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3802
   (insert-element token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3803
   (setf (tokenizer-state tokenizer) :rcdata-state)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3804
   (setf in-body-process-space-characters-mode :drop-newline)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3805
   (setf frameset-ok nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3806
   nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3807
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3808
 (def :in-body start-tag-i-frame (frameset-ok)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3809
   (setf frameset-ok nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3810
   (start-tag-rawtext token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3811
   nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3812
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3813
 (def :in-body start-tag-rawtext ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3814
   ;;;iframe, noembed noframes, noscript(if scripting enabled)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3815
   (parse-rc-data-raw-text token :rawtext)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3816
   nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3817
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3818
 (def :in-body start-tag-opt (phase)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3819
   (when (string= (node-name (last-open-element)) "option")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3820
     (process-end-tag (implied-tag-token "option") :phase phase))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3821
   (reconstruct-active-formatting-elements)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3822
   (insert-element token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3823
   nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3824
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3825
 (def :in-body start-tag-select (frameset-ok)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3826
   (reconstruct-active-formatting-elements)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3827
   (insert-element token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3828
   (setf frameset-ok nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3829
   (if (member (parser-phase *parser*) '(:in-table :in-caption :in-column-group
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3830
                                         :in-table-body :in-row :in-cell))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3831
       (setf (parser-phase *parser*) :in-select-in-table)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3832
       (setf (parser-phase *parser*) :in-select))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3833
   nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3834
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3835
 (def :in-body start-tag-rp-rt ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3836
   (when (element-in-scope "ruby")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3837
     (generate-implied-end-tags)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3838
     (when (string/= (node-name (last-open-element)) "ruby")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3839
       (perror :expected-ruby-tag)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3840
   (insert-element token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3841
   nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3842
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3843
 (def :in-body start-tag-math (open-elements)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3844
   (reconstruct-active-formatting-elements)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3845
   (adjust-math-ml-attributes token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3846
   (adjust-foreign-attributes token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3847
   (setf (getf token :namespace) (find-namespace "mathml"))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3848
   (insert-element token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3849
   ;;Need to get the parse error right for the case where the token
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3850
   ;;has a namespace not equal to the xmlns attribute
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3851
   (when (getf token :self-closing)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3852
     (pop-end open-elements)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3853
     (setf (getf token :self-closing-acknowledged) t))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3854
   nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3855
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3856
 (def :in-body start-tag-svg (open-elements)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3857
   (reconstruct-active-formatting-elements)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3858
   (adjust-svg-attributes token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3859
   (adjust-foreign-attributes token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3860
   (setf (getf token :namespace) (find-namespace "svg"))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3861
   (insert-element token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3862
   ;;Need to get the parse error right for the case where the token
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3863
   ;;has a namespace not equal to the xmlns attribute
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3864
   (when (getf token :self-closing)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3865
     (pop-end open-elements)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3866
     (setf (getf token :self-closing-acknowledged) t))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3867
   nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3868
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3869
 (def :in-body start-tag-misplaced ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3870
   ;;; Elements that should be children of other elements that have a
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3871
   ;;; different insertion mode; here they are ignored
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3872
   ;;; "caption", "col", "colgroup", "frame", "frameset", "head",
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3873
   ;;; "option", "optgroup", "tbody", "td", "tfoot", "th", "thead",
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3874
   ;;; "tr", "noscript"
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3875
   (perror :unexpected-start-tag-ignored :name (getf token :name))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3876
   nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3877
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3878
 (def :in-body start-tag-other ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3879
   (reconstruct-active-formatting-elements)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3880
   (insert-element token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3881
   nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3882
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3883
 (def :in-body end-tag-p (open-elements)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3884
   (cond ((not (element-in-scope "p" "button"))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3885
          (start-tag-close-p (implied-tag-token "p" :start-tag))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3886
          (perror :unexpected-end-tag :name "p")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3887
          (end-tag-p (implied-tag-token "p")))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3888
         (t
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3889
          (generate-implied-end-tags "p")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3890
          (when (string/= (node-name (last-open-element)) "p")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3891
            (perror :unexpected-end-tag :name "p"))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3892
          (let ((node (pop-end open-elements)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3893
            (loop until (string= (node-name node) "p")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3894
                  do (setf node (pop-end open-elements))))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3895
   nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3896
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3897
 (def :in-body end-tag-body (open-elements)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3898
   (block nil
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3899
     (when (not (element-in-scope "body"))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3900
       (perror :unexpected-scope)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3901
       (return nil))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3902
     (when (string/= (node-name (last-open-element)) "body")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3903
       (loop for node in (cddr open-elements)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3904
             do (when (member (node-name node)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3905
                              '("dd" "dt" "li" "optgroup" "option" "p" "rp"
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3906
                                "rt" "tbody" "td" "tfoot" "th" "thead" "tr"
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3907
                                "body" "html")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3908
                              :test #'string=)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3909
                  ;;Not sure this is the correct name for the parse error
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3910
                  (perror :expected-one-end-tag-but-got-another
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3911
                          :expected-name "body" :got-name (node-name node))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3912
                  (return)))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3913
   (setf (parser-phase *parser*) :after-body)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3914
   nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3915
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3916
 (def :in-body end-tag-html ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3917
   ;;We repeat the test for the body end tag token being ignored here
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3918
   (cond ((element-in-scope "body")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3919
          (end-tag-body (implied-tag-token "body"))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3920
          token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3921
         (t nil)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3922
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3923
 (def :in-body end-tag-block (in-body-process-space-characters-mode open-elements)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3924
   ;;Put us back in the right whitespace handling mode
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3925
   (when (string= (getf token :name) "pre")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3926
     (setf in-body-process-space-characters-mode :non-pre))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3927
   (let ((in-scope (element-in-scope (getf token :name))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3928
     (when in-scope
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3929
       (generate-implied-end-tags))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3930
     (when (string/= (node-name (last-open-element))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3931
                     (getf token :name))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3932
       (perror :end-tag-too-early :name (getf token :name)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3933
     (when in-scope
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3934
       (let ((node (pop-end open-elements)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3935
         (loop until (string= (node-name node) (getf token :name))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3936
               do (setf node (pop-end open-elements))))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3937
   nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3938
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3939
 (def :in-body end-tag-form (form-pointer open-elements)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3940
   (let ((node form-pointer))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3941
     (setf form-pointer nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3942
     (if (or (null node) (not (element-in-scope (node-name node))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3943
         (perror :unexpected-end-tag :name "form")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3944
         (progn
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3945
           (generate-implied-end-tags)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3946
           (when (not (equal (last-open-element) node))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3947
             (perror :end-tag-too-early-ignored :name "form"))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3948
           (setf open-elements
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3949
                 (remove node open-elements)))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3950
   nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3951
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3952
 ;;; Note to self:
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3953
 ;;;   - A token is a plist.
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3954
 ;;;   - A property is an alist.
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3955
 ;;;   - A node is an object.
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3956
 ;;;   - An element is a node.
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3957
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3958
 (def :in-body end-tag-list-item (open-elements)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3959
   (let ((variant (if (string= (getf token :name) "li")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3960
                      "list"
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3961
                      nil)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3962
     (if (not (element-in-scope (getf token :name) variant))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3963
         (perror :unexpected-end-tag :name (getf token :name))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3964
         (progn
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3965
           (generate-implied-end-tags (getf token :name))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3966
           (when (string/= (node-name (last-open-element))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3967
                           (getf token :name))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3968
             (perror :end-tag-too-early :name (getf token :name)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3969
           (let ((node (pop-end open-elements)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3970
             (loop until (string= (node-name node) (getf token :name))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3971
                   do (setf node (pop-end open-elements)))))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3972
   nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3973
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3974
 (def :in-body end-tag-heading (open-elements)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3975
   (loop for item in +heading-elements+
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3976
         do (when (element-in-scope item)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3977
              (generate-implied-end-tags)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3978
              (return)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3979
   (when (string/= (node-name (last-open-element))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3980
                   (getf token :name))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3981
     (perror :end-tag-too-early :name (getf token :name)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3982
   (loop for item in +heading-elements+
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3983
         do (when (element-in-scope item)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3984
              (let ((item (pop-end open-elements)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3985
                (loop until (member (node-name item) +heading-elements+
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3986
                                    :test #'string=)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3987
                      do (setf item (pop-end open-elements))))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3988
   nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3989
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3990
 (defmacro insert-elt-at (object index place)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3991
   (let ((tmp (gensym "TMP"))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3992
         (object-symbol (gensym "OBJECT"))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3993
         (index-symbol (gensym "INDEX")))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3994
     `(let ((,object-symbol ,object)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3995
            (,index-symbol ,index)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3996
            (,tmp ,place))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3997
        (setf ,place (append (subseq ,tmp 0 (min ,index-symbol (length ,tmp)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3998
                             (list ,object-symbol)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3999
                             (nthcdr ,index-symbol ,tmp))))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4000
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4001
 (def :in-body end-tag-formatting (active-formatting-elements open-elements)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4002
   ;; The much-feared adoption agency algorithm
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4003
   ;; http://www.whatwg.org/specs/web-apps/current-work/#adoptionAgency
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4004
   ;; XXX Better parseError messages appreciated.
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4005
   (loop named outer
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4006
         with name = (getf token :name)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4007
         with outer-loop-counter = 0
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4008
         with formatting-element
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4009
         with afe-index
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4010
         with furthest-block
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4011
         with bookmark
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4012
         with last-node
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4013
         with inner-loop-counter
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4014
         with index
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4015
         with node
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4016
         with common-ancestor
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4017
         with clone
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4018
         while (< outer-loop-counter 8)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4019
         do
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4020
         (incf outer-loop-counter)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4021
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4022
         ;; Step 1 paragraph 1
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4023
         (setf formatting-element
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4024
               (element-in-active-formatting-elements name))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4025
         (cond ((or (not formatting-element)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4026
                    (and (member formatting-element
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4027
                                 open-elements)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4028
                         (not (element-in-scope
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4029
                               (node-name formatting-element)))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4030
                (perror :adoption-agency-1.1 :name name)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4031
                (return-from outer nil))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4032
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4033
               ;; Step 1 paragraph 2
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4034
               ((not (member formatting-element
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4035
                             open-elements))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4036
                (perror :adoption-agency-1.2 :name name)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4037
                (setf active-formatting-elements
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4038
                      (remove formatting-element active-formatting-elements))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4039
                (return-from outer nil)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4040
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4041
         ;; Step 1 paragraph 3
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4042
         (unless (eql formatting-element
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4043
                      (last-open-element))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4044
           (perror :adoption-agency-1.3 :name name))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4045
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4046
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4047
         ;; Step 2
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4048
         ;; Start of the adoption agency algorithm proper
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4049
         (setf afe-index (position formatting-element
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4050
                                   open-elements))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4051
         (setf furthest-block nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4052
         (loop for element in (subseq open-elements
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4053
                                      afe-index)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4054
               do (when (member (node-name-tuple element)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4055
                                +special-elements+
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4056
                                :test #'equal)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4057
                    (setf furthest-block element)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4058
                    (return)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4059
         ;; Step 3
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4060
         (when (null furthest-block)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4061
           (loop for element = (pop-end open-elements)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4062
                 until (eql formatting-element element)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4063
                 finally (setf active-formatting-elements
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4064
                               (remove element
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4065
                                       active-formatting-elements)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4066
           (return-from outer nil))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4067
         (setf common-ancestor (elt open-elements (- afe-index 1)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4068
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4069
         ;; Step 5
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4070
         ;;if furthestBlock.parent:
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4071
         ;;    furthestBlock.parent.removeChild(furthestBlock)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4072
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4073
         ;; Step 5
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4074
         ;; The bookmark is supposed to help us
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4075
         ;; identify where to reinsert nodes in step
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4076
         ;; 12. We have to ensure that we reinsert
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4077
         ;; nodes after the node before the active
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4078
         ;; formatting element.  Note the bookmark can
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4079
         ;; move in step 7.4
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4080
         (setf bookmark (position formatting-element
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4081
                                  active-formatting-elements))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4082
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4083
         ;; Step 6
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4084
         (setf node furthest-block)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4085
         (setf last-node node)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4086
         (setf inner-loop-counter 0)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4087
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4088
         (setf index (position node open-elements))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4089
         (loop named inner
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4090
               while (< inner-loop-counter 3)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4091
               do
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4092
               (block continue
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4093
                 (incf inner-loop-counter)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4094
                 ;; Node is element before node in open elements
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4095
                 (decf index)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4096
                 (setf node (elt open-elements index))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4097
                 (when (not (member node active-formatting-elements))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4098
                   (setf open-elements
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4099
                         (remove node open-elements))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4100
                   (return-from continue))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4101
                 ;; Step 6.3
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4102
                 (when (eql node formatting-element)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4103
                   (return-from inner))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4104
                 ;; Step 6.4
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4105
                 (when (eql last-node furthest-block)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4106
                   (setf bookmark (1+ (position node
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4107
                                                active-formatting-elements))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4108
                 ;; Step 6.5
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4109
                 (setf clone (node-clone* node))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4110
                 ;; Replace node with clone
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4111
                 (symbol-macrolet
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4112
                     ((af active-formatting-elements)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4113
                      (oe open-elements))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4114
                   (setf (elt af (position node af)) clone)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4115
                   (setf (elt oe (position node oe)) clone))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4116
                 (setf node clone)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4117
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4118
                 ;; Step 6.6
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4119
                 ;; Remove lastNode from its parents, if any
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4120
                 (when (node-parent last-node)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4121
                   (node-remove-child (node-parent last-node)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4122
                                      last-node))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4123
                 (node-append-child node last-node)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4124
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4125
                 ;; Step 7.7
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4126
                 (setf last-node node)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4127
                 ;; End of inner loop
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4128
                 ))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4129
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4130
         ;; Step 7
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4131
         ;; Foster parent lastNode if commonAncestor is a
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4132
         ;; table, tbody, tfoot, thead, or tr we need to
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4133
         ;; foster parent the lastNode
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4134
         (when (node-parent last-node)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4135
           (node-remove-child (node-parent last-node)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4136
                              last-node))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4137
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4138
         (if (member (node-name common-ancestor)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4139
                     '("table" "tbody" "tfoot" "thead" "tr")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4140
                     :test #'string=)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4141
             (multiple-value-bind (parent insert-before)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4142
                 (get-table-misnested-nodeposition)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4143
               (node-insert-before* parent last-node insert-before))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4144
             (node-append-child* common-ancestor last-node))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4145
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4146
         ;; Step 8
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4147
         (setf clone (node-clone* formatting-element))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4148
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4149
         ;; Step 9
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4150
         (node-reparent-children furthest-block clone)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4151
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4152
         ;; Step 10
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4153
         (node-append-child* furthest-block clone)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4154
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4155
         ;; Step 11
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4156
         (setf active-formatting-elements
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4157
               (remove formatting-element
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4158
                       active-formatting-elements))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4159
         (insert-elt-at clone bookmark active-formatting-elements)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4160
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4161
         ;; Step 12
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4162
         (setf open-elements
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4163
               (remove formatting-element
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4164
                       open-elements))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4165
         (insert-elt-at clone
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4166
                        (1+ (position furthest-block
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4167
                                      open-elements))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4168
                        open-elements))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4169
   nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4170
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4171
 (def :in-body end-tag-applet-marquee-object (open-elements)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4172
   (when (element-in-scope (getf token :name))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4173
     (generate-implied-end-tags))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4174
   (when (string/= (node-name (last-open-element))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4175
                   (getf token :name))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4176
     (perror :end-tag-too-early :name (getf token :name)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4177
   (when (element-in-scope (getf token :name))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4178
     (let ((element (pop-end open-elements)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4179
       (loop until (string= (node-name element) (getf token :name))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4180
             do (setf element (pop-end open-elements))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4181
     (clear-active-formatting-elements))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4182
   nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4183
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4184
 (def :in-body end-tag-br (open-elements)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4185
   (perror :unexpected-end-tag-treated-as
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4186
           :original-name "br" :new-name "br element")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4187
   (reconstruct-active-formatting-elements)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4188
   (insert-element (implied-tag-token "br" :start-tag))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4189
   (pop-end open-elements)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4190
   nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4191
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4192
 (def :in-body end-tag-other (open-elements)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4193
   (loop for node in (reverse open-elements)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4194
         do (cond ((string= (node-name node) (getf token :name))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4195
                   (generate-implied-end-tags (getf token :name))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4196
                   (when (string/= (node-name (last-open-element))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4197
                                   (getf token :name))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4198
                     (perror :unexpected-end-tag :name (getf token :name)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4199
                   (loop while (not (eq node
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4200
                                        (pop-end open-elements))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4201
                   (return))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4202
                  (t
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4203
                   (when (member (node-name-tuple node) +special-elements+
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4204
                                 :test #'equal)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4205
                     (perror :unexpected-end-tag :name (getf token :name))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4206
                     (return)))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4207
   nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4208
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4209
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4210
 ;; TextPhase
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4211
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4212
 (tagname-dispatch :text process-start-tag
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4213
   (default start-tag-other))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4214
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4215
 (tagname-dispatch :text process-end-tag
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4216
   ("script" end-tag-script)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4217
   (default end-tag-other))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4218
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4219
 (def :text process-characters ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4220
   (parser-insert-text (getf token :data))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4221
   nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4222
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4223
 (def :text process-eof (phase original-phase open-elements)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4224
   (perror :expected-named-closing-tag-but-got-eof
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4225
           (node-name (last-open-element)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4226
   (pop-end open-elements)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4227
   (setf phase original-phase)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4228
   t)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4229
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4230
 (def :text start-tag-other ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4231
   (error "Tried to process start tag ~S in RCDATA/RAWTEXT mode" (getf token :name)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4232
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4233
 (def :text end-tag-script (phase original-phase open-elements)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4234
   (assert (string= (node-name (pop-end open-elements))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4235
                    "script"))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4236
   (setf phase original-phase)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4237
   ;; The rest of this method is all stuff that only happens if
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4238
   ;; document.write works
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4239
   nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4240
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4241
 (def :text end-tag-other (phase original-phase open-elements)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4242
   (pop-end open-elements)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4243
   (setf phase original-phase)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4244
   nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4245
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4246
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4247
 ;; InTablePhase
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4248
 ;; http://www.whatwg.org/specs/web-apps/current-work/#in-table
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4249
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4250
 (tagname-dispatch :in-table process-start-tag
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4251
   ("html" start-tag-html)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4252
   ("caption" start-tag-caption)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4253
   ("colgroup" start-tag-colgroup)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4254
   ("col" start-tag-col)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4255
   (("tbody" "tfoot" "thead") start-tag-row-group)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4256
   (("td" "th" "tr") start-tag-imply-tbody)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4257
   ("table" start-tag-table)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4258
   (("style" "script") start-tag-style-script)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4259
   ("input" start-tag-input)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4260
   ("form" start-tag-form)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4261
   (default start-tag-other))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4262
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4263
 (tagname-dispatch :in-table process-end-tag
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4264
   ("table" end-Tag-Table)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4265
   (("body" "caption" "col" "colgroup" "html" "tbody" "td"
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4266
            "tfoot" "th" "thead" "tr") end-Tag-Ignore)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4267
   (default end-tag-other))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4268
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4269
 (flet ((clear-stack-to-table-context ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4270
          ;; clear the stack back to a table context
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4271
          (loop until (member (node-name (last-open-element))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4272
                              '("table" "html")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4273
                              :test #'string=)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4274
             do
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4275
               ;;(perror :unexpected-implied-end-tag-in-table
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4276
               ;;        :name (node-name* (last-open-element)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4277
               (pop-end (slot-value *parser* 'open-elements)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4278
          ;; When the current node is <html> it's an innerHTML case
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4279
          ))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4280
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4281
   (def :in-table process-eof (inner-html)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4282
     (if (string/= (node-name (last-open-element)) "html")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4283
         (perror :eof-in-table)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4284
         (assert inner-html))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4285
     ;; Stop parsing
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4286
     nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4287
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4288
   (def :in-table process-space-characters (phase original-phase)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4289
     (setf original-phase phase)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4290
     (setf phase :in-table-text)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4291
     (process-space-characters token :phase phase)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4292
     nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4293
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4294
   (def :in-table process-characters (phase original-phase)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4295
     (setf original-phase phase)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4296
     (setf phase :in-table-text)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4297
     (process-characters token :phase phase)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4298
     nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4299
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4300
   (def :in-table insert-text (insert-from-table)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4301
     ;; If we get here there must be at least one non-whitespace character
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4302
     ;; Do the table magic!
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4303
     (setf insert-from-table t)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4304
     (process-characters token :phase :in-body)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4305
     (setf insert-from-table nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4306
     nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4307
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4308
   (def :in-table start-tag-caption (phase active-formatting-elements)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4309
     (clear-stack-to-table-context)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4310
     (push-end :marker active-formatting-elements)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4311
     (insert-element token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4312
     (setf phase :in-caption)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4313
     nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4314
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4315
   (def :in-table start-tag-colgroup (phase)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4316
     (clear-stack-to-table-context)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4317
     (insert-element token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4318
     (setf phase :in-column-group)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4319
     nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4320
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4321
   (def :in-table start-tag-col ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4322
     (start-tag-colgroup (implied-tag-token "colgroup" :start-tag))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4323
     token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4324
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4325
   (def :in-table start-tag-row-group (phase)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4326
     (clear-stack-to-table-context)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4327
     (insert-element token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4328
     (setf phase :in-table-body)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4329
     nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4330
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4331
   (def :in-table start-tag-imply-tbody ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4332
     (start-tag-row-group (implied-tag-token "tbody" :start-tag))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4333
     token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4334
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4335
   (def :in-table start-tag-table (phase inner-html)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4336
     (perror :unexpected-start-tag-implies-end-tag
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4337
             :start-name "table"
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4338
             :end-name "table")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4339
     (process-end-tag (implied-tag-token "table") :phase phase)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4340
     (unless inner-html
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4341
       token))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4342
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4343
   (def :in-table start-tag-style-script ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4344
     (process-start-tag token :phase :in-head))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4345
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4346
   (def :in-table start-tag-input (open-elements)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4347
     (let ((type (assoc "type" (getf token :data) :test #'string=)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4348
       (cond ((and type
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4349
                   (string= (ascii-upper-2-lower (cdr type)) "hidden"))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4350
              (perror :unexpected-hidden-input-in-table)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4351
              (insert-element token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4352
               ;; XXX associate with form
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4353
              (pop-end open-elements))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4354
             (t
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4355
              (start-tag-other token))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4356
     nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4357
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4358
   (def :in-table start-tag-form (form-pointer open-elements)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4359
     (perror :unexpected-form-in-table)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4360
     (unless form-pointer
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4361
       (insert-element token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4362
       (setf form-pointer (last-open-element))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4363
       (pop-end open-elements))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4364
     nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4365
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4366
   (def :in-table start-tag-other (insert-from-table)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4367
     (perror :unexpected-start-tag-implies-table-voodoo :name (getf token :name))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4368
     ;; Do the table magic!
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4369
     (setf insert-from-table t)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4370
     (process-start-tag token :phase :in-body)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4371
     (setf insert-from-table nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4372
     nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4373
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4374
   (def :in-table end-tag-table (inner-html open-elements)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4375
     (cond ((element-in-scope "table" "table")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4376
            (generate-implied-end-tags)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4377
            (unless (equal (node-name (last-open-element)) "table")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4378
              (perror :end-tag-too-early-named
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4379
                      :got-name "table"
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4380
                      :expected-name (node-name (last-open-element))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4381
            (loop until (equal (node-name (last-open-element)) "table")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4382
               do (pop-end open-elements))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4383
            (pop-end open-elements)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4384
            (reset-insertion-mode))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4385
           (t
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4386
            ;; innerHTML case
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4387
            (assert inner-html)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4388
            (perror :end-tag-table-in-table-inner-html-case)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4389
     nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4390
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4391
   (def :in-table end-tag-ignore ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4392
     (perror :unexpected-end-tag :name (getf token :name))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4393
     nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4394
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4395
   (def :in-table end-tag-other (insert-from-table)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4396
     (perror :unexpected-end-tag-implies-table-voodoo :name (getf token :name))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4397
     ;; Do the table magic!
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4398
     (setf insert-from-table t)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4399
     (process-end-tag token :phase :in-body)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4400
     (setf insert-from-table nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4401
     nil))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4402
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4403
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4404
 ;; InTableTextPhase
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4405
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4406
 (defun flush-characters ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4407
   (with-slots (character-tokens) *parser*
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4408
     (let ((data (apply #'concatenate 'string
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4409
                        (loop for item in (reverse character-tokens)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4410
                              collect (getf item :data)))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4411
       (if (not (only-space-characters-p data))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4412
           (insert-text (list :type :characters
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4413
                              :data data)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4414
                        :phase :in-table)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4415
           (parser-insert-text data)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4416
     (setf character-tokens nil)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4417
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4418
 (def :in-table-text process-comment (phase original-phase)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4419
   (flush-characters)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4420
   (setf phase original-phase)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4421
   token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4422
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4423
 (def :in-table-text process-eof (phase original-phase)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4424
   (flush-characters)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4425
   (setf phase original-phase)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4426
   t)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4427
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4428
 (def :in-table-text process-characters (character-tokens)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4429
   (unless (equal (getf token :data) (string #\u0000))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4430
     (push token character-tokens))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4431
   nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4432
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4433
 (def :in-table-text process-space-characters (character-tokens)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4434
   ;; pretty sure we should never reach here
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4435
   (push token character-tokens)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4436
   nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4437
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4438
 (def :in-table-text process-start-tag (phase original-phase)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4439
   (flush-characters)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4440
   (setf phase original-phase)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4441
   token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4442
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4443
 (def :in-table-text process-end-tag (phase original-phase)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4444
   (flush-characters)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4445
   (setf phase original-phase)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4446
   token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4447
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4448
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4449
 ;; InCaptionPhase
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4450
 ;; http://www.whatwg.org/specs/web-apps/current-work/#in-caption
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4451
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4452
 (tagname-dispatch :in-caption process-start-tag
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4453
   ("html" start-tag-html)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4454
   (("caption" "col" "colgroup" "tbody" "td" "tfoot" "th"
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4455
               "thead" "tr") start-tag-table-element)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4456
   (default start-tag-other))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4457
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4458
 (tagname-dispatch :in-caption process-end-tag
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4459
   ("caption" end-tag-caption)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4460
   ("table" end-tag-table)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4461
   (("body" "col" "colgroup" "html" "tbody" "td" "tfoot" "th"
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4462
            "thead" "tr") end-tag-ignore)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4463
   (default end-tag-other))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4464
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4465
 (flet ((ignore-end-tag-caption ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4466
          (not (element-in-scope "caption" "table"))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4467
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4468
   (def :in-caption process-eof ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4469
     (process-eof token :phase :in-body))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4470
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4471
   (def :in-caption process-characters ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4472
     (process-characters token :phase :in-body))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4473
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4474
   (def :in-caption start-tag-table-element (phase)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4475
     (perror :start-tag-table-element-in-caption)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4476
     ;; XXX Have to duplicate logic here to find out if the tag is ignored
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4477
     (prog1 (unless (ignore-end-tag-caption)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4478
              token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4479
       (process-end-tag (implied-tag-token "caption") :phase phase)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4480
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4481
   (def :in-caption start-tag-other ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4482
     (process-start-tag token :phase :in-body))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4483
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4484
   (def :in-caption end-tag-caption (phase inner-html open-elements)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4485
     (cond ((not (ignore-end-tag-caption))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4486
            ;; AT this code is quite similar to endTagTable in "InTable"
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4487
            (generate-implied-end-tags)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4488
            (unless (equal (node-name (last-open-element)) "caption")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4489
              (perror :expected-one-end-tag-but-got-another
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4490
                      :got-name "caption"
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4491
                      :expected-name (node-name (last-open-element))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4492
            (loop until (equal (node-name (last-open-element)) "caption")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4493
               do (pop-end open-elements))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4494
            (clear-active-formatting-elements)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4495
            (setf phase :in-table))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4496
           (t
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4497
            ;; innerHTML case
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4498
            (assert inner-html)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4499
            (perror :end-tag-caption-in-caption-inner-html-mode)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4500
     nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4501
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4502
   (def :in-caption end-tag-table (phase)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4503
     (perror :end-tag-table-in-caption)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4504
     (prog1 (unless (ignore-end-tag-caption)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4505
              token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4506
       (process-end-tag (implied-tag-token "caption") :phase phase)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4507
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4508
   (def :in-caption end-tag-ignore ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4509
     (perror :unexpected-end-tag :name (getf token :name))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4510
     nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4511
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4512
   (def :in-caption end-tag-other ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4513
     (process-end-tag token :phase :in-body)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4514
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4515
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4516
 ;; InColumnGroupPhase
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4517
 ;; http://www.whatwg.org/specs/web-apps/current-work/#in-column
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4518
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4519
 (tagname-dispatch :in-column-group process-start-tag
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4520
   ("html" start-tag-html)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4521
   ("col" start-tag-col)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4522
   (default start-tag-other))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4523
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4524
 (tagname-dispatch :in-column-group process-end-tag
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4525
   ("colgroup" end-tag-colgroup)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4526
   ("col" end-tag-col)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4527
   (default end-tag-other))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4528
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4529
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4530
 (flet ((ignore-end-tag-colgroup ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4531
          (string= (node-name (last-open-element)) "html")))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4532
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4533
   (def :in-column-group process-eof (inner-html)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4534
     (cond ((string= (node-name (last-open-element)) "html")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4535
            (assert inner-html)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4536
            nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4537
           (t
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4538
            (let ((ignore-end-tag (ignore-end-tag-colgroup)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4539
              (end-tag-colgroup (implied-tag-token "colgroup"))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4540
              (not ignore-end-tag)))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4541
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4542
   (def :in-column-group process-characters ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4543
     (prog1 (unless (ignore-end-tag-colgroup)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4544
              token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4545
       (end-tag-colgroup (implied-tag-token "colgroup"))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4546
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4547
   (def :in-column-group start-tag-col (open-elements)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4548
     (insert-element token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4549
     (pop-end open-elements)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4550
     nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4551
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4552
   (def :in-column-group start-tag-other ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4553
     (prog1 (unless (ignore-end-tag-colgroup)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4554
              token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4555
       (end-tag-colgroup (implied-tag-token "colgroup"))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4556
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4557
   (def :in-column-group end-tag-colgroup (phase open-elements)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4558
     (cond ((ignore-end-tag-colgroup)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4559
            ;; innerHTML case
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4560
            (perror :end-tag-colgroup-in-column-group-inner-html-mode))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4561
           (t
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4562
            (pop-end open-elements)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4563
            (setf phase :in-table)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4564
     nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4565
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4566
   (def :in-column-group end-tag-col ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4567
     (perror :no-end-tag :name "col")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4568
     nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4569
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4570
   (def :in-column-group end-tag-other ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4571
     (prog1 (unless (ignore-end-tag-colgroup)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4572
              token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4573
       (end-tag-colgroup (implied-tag-token "colgroup")))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4574
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4575
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4576
 ;; InTableBodyPhase
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4577
 ;; http://www.whatwg.org/specs/web-apps/current-work/#in-table0
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4578
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4579
 (tagname-dispatch :in-table-body process-start-tag
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4580
   ("html" start-tag-html)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4581
   ("tr" start-tag-tr)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4582
   (("td" "th") start-tag-table-cell)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4583
   (("caption" "col" "colgroup" "tbody" "tfoot" "thead") start-tag-table-other)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4584
   (default start-tag-other))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4585
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4586
 (tagname-dispatch :in-table-body process-end-tag
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4587
   (("tbody" "tfoot" "thead") end-Tag-Table-Row-Group)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4588
   ("table" end-Tag-Table)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4589
   (("body" "caption" "col" "colgroup" "html" "td" "th" "tr") end-Tag-Ignore)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4590
   (default end-tag-other))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4591
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4592
 (flet ((clear-stack-to-table-body-context ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4593
          (loop until (member (node-name (last-open-element))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4594
                              '("tbody" "tfoot" "thead" "html")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4595
                              :test #'string=)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4596
             do
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4597
               ;;(perror :unexpected-implied-end-tag-in-table
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4598
               ;;        :name (node-name (last-open-element)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4599
               (pop-end (slot-value *parser* 'open-elements)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4600
          (when (string= (node-name (last-open-element)) "html")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4601
            (assert (slot-value *parser* 'inner-html)))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4602
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4603
   (def :in-table-body process-eof ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4604
     (process-eof token :phase :in-table))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4605
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4606
   (def :in-table-body process-space-characters ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4607
     (process-space-characters token :phase :in-table))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4608
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4609
   (def :in-table-body process-characters ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4610
     (process-characters token :phase :in-table))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4611
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4612
   (def :in-table-body start-tag-tr (phase)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4613
     (clear-stack-to-table-body-context)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4614
     (insert-element token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4615
     (setf phase :in-row)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4616
     nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4617
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4618
   (def :in-table-body start-tag-table-cell ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4619
     (perror :unexpected-cell-in-table-body :name (getf token :name))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4620
     (start-tag-tr (implied-tag-token "tr" :start-tag))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4621
     token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4622
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4623
   (def :in-table-body start-tag-table-other (inner-html)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4624
     ;; XXX AT Any ideas on how to share this with endTagTable?
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4625
     (cond ((or (element-in-scope "tbody" "table")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4626
                (element-in-scope "thead" "table")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4627
                (element-in-scope "tfoot" "table"))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4628
            (clear-stack-to-table-body-context)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4629
            (end-tag-table-row-group
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4630
             (implied-tag-token (node-name (last-open-element))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4631
            token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4632
           (t
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4633
            ;; innerHTML case
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4634
            (assert inner-html)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4635
            (perror :start-tag-table-other-in-table-body-inner-html-mode)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4636
            nil)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4637
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4638
   (def :in-table-body start-tag-other ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4639
     (process-start-tag token :phase :in-table))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4640
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4641
   (def :in-table-body end-tag-table-row-group (phase open-elements)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4642
     (cond ((element-in-scope (getf token :name) "table")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4643
            (clear-stack-to-table-body-context)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4644
            (pop-end open-elements)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4645
            (setf phase :in-table))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4646
           (t
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4647
            (perror :unexpected-end-tag-in-table-body :name (getf token :name))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4648
     nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4649
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4650
   (def :in-table-body end-tag-table (inner-html)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4651
     (cond ((or (element-in-scope "tbody" "table")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4652
                 (element-in-scope "thead" "table")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4653
                 (element-in-scope "tfoot" "table"))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4654
            (clear-stack-to-table-body-context)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4655
            (end-tag-table-row-group
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4656
             (implied-tag-token (node-name (last-open-element))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4657
            token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4658
           (t
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4659
            ;; innerHTML case
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4660
            (assert inner-html)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4661
            (perror :end-tag-table-other-in-table-body-inner-html-mode)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4662
            nil)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4663
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4664
   (def :in-table-body end-tag-ignore ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4665
     (perror :unexpected-end-tag-in-table-body :name (getf token :name))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4666
     nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4667
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4668
   (def :in-table-body end-tag-other ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4669
     (process-end-tag token :phase :in-table)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4670
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4671
 ;; InRowPhase
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4672
 ;; http://www.whatwg.org/specs/web-apps/current-work/#in-row
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4673
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4674
 (tagname-dispatch :in-row process-start-tag
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4675
   ("html" start-tag-html)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4676
   (("td" "th") start-tag-table-cell)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4677
   (("caption" "col" "colgroup" "tbody" "tfoot" "thead" "tr")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4678
    start-tag-table-other)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4679
   (default start-tag-other))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4680
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4681
 (tagname-dispatch :in-row process-end-tag
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4682
   ("tr" end-tag-tr)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4683
   ("table" end-tag-table)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4684
   (("tbody" "tfoot" "thead") end-tag-table-row-group)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4685
   (("body" "caption" "col" "colgroup" "html" "td" "th") end-tag-ignore)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4686
   (default end-tag-other))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4687
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4688
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4689
 ;; helper methods (XXX unify this with other table helper methods)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4690
 (flet ((clear-stack-to-table-row-context ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4691
          (loop until (member (node-name (last-open-element))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4692
                              '("tr" "html")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4693
                              :test #'string=)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4694
             do
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4695
               (perror :unexpected-implied-end-tag-in-table-row
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4696
                       :name (node-name (last-open-element)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4697
               (pop-end (slot-value *parser* 'open-elements))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4698
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4699
        (ignore-end-tag-tr ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4700
          (not (element-in-scope "tr" "table"))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4701
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4702
   ;; the rest
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4703
   (def :in-row process-eof ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4704
     (process-eof token :phase :in-table)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4705
     nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4706
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4707
   (def :in-row process-space-characters ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4708
     (process-space-characters token :phase :in-table))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4709
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4710
   (def :in-row process-characters ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4711
     (process-characters token :phase :in-table))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4712
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4713
   (def :in-row start-tag-table-cell (phase active-formatting-elements)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4714
     (clear-stack-to-table-row-context)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4715
     (insert-element token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4716
     (setf phase :in-cell)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4717
     (push-end :marker active-formatting-elements)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4718
     nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4719
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4720
   (def :in-row start-tag-table-other ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4721
     (let ((ignore-end-tag (ignore-end-tag-tr)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4722
       (end-tag-tr (implied-tag-token "tr"))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4723
        ;; XXX how are we sure it's always ignored in the innerHTML case?
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4724
       (unless ignore-end-tag
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4725
         token)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4726
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4727
   (def :in-row start-tag-other ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4728
     (process-start-tag token :phase :in-table))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4729
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4730
   (def :in-row end-tag-tr (phase inner-html open-elements)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4731
     (cond ((not (ignore-end-tag-tr))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4732
            (clear-stack-to-table-row-context)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4733
            (pop-end open-elements)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4734
            (setf phase :in-table-body))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4735
           (t
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4736
            ;; innerHTML case
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4737
            (assert inner-html)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4738
            (perror :end-tag-tr-inner-html-mode)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4739
     nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4740
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4741
   (def :in-row end-tag-table ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4742
     (let ((ignore-end-tag (ignore-end-tag-tr)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4743
       (end-tag-tr (implied-tag-token "tr"))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4744
       ;; Reprocess the current tag if the tr end tag was not ignored
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4745
       ;; XXX how are we sure it's always ignored in the innerHTML case?
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4746
       (unless ignore-end-tag
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4747
         token)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4748
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4749
   (def :in-row end-tag-table-row-group ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4750
     (cond ((element-in-scope (getf token :name) "table")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4751
            (end-tag-tr (implied-tag-token "tr"))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4752
            token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4753
           (t
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4754
            (perror :end-tag-table-row-group-something-wrong)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4755
            nil)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4756
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4757
   (def :in-row end-tag-ignore ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4758
     (perror :unexpected-end-tag-in-table-row (getf token :name))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4759
     nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4760
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4761
   (def :in-row end-tag-other ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4762
     (process-end-tag token :phase :in-table)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4763
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4764
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4765
 ;; InCellPhase
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4766
 ;; http://www.whatwg.org/specs/web-apps/current-work/#in-cell
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4767
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4768
 (tagname-dispatch :in-cell process-start-tag
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4769
   ("html" start-tag-html)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4770
   (("caption" "col" "colgroup" "tbody" "td" "tfoot" "th" "thead" "tr")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4771
    start-tag-table-other)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4772
   (default start-tag-other))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4773
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4774
 (tagname-dispatch :in-cell process-end-tag
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4775
   (("td" "th") end-tag-table-cell)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4776
   (("body" "caption" "col" "colgroup" "html") end-tag-ignore)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4777
   (("table" "tbody" "tfoot" "thead" "tr") end-tag-imply)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4778
   (default end-tag-other))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4779
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4780
 (flet ((close-cell ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4781
          (if (element-in-scope "td" "table")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4782
              (end-tag-table-cell (implied-tag-token "td"))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4783
              (if (element-in-scope "th" "table")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4784
                  (end-tag-table-cell (implied-tag-token "th"))))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4785
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4786
   (def :in-cell process-eof ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4787
     (process-eof token :phase :in-body)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4788
     nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4789
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4790
   (def :in-cell process-characters ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4791
     (process-characters token :phase :in-body))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4792
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4793
   (def :in-cell start-tag-table-other (inner-html)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4794
     (cond ((or (element-in-scope "td" "table")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4795
                (element-in-scope "th" "table"))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4796
            (close-cell)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4797
            token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4798
           (t
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4799
            ;; innerHTML case
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4800
            (assert inner-html)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4801
            (perror :start-tag-table-other-in-inner-html-mode)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4802
            nil)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4803
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4804
   (def :in-cell start-tag-other ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4805
     (process-start-tag token :phase :in-body))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4806
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4807
   (def :in-cell end-tag-table-cell (phase open-elements)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4808
     (cond ((element-in-scope (getf token :name) "table")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4809
            (generate-implied-end-tags (getf token :name))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4810
            (cond ((not (equal (node-name (last-open-element))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4811
                               (getf token :name)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4812
                   (perror :unexpected-cell-end-tag :name (getf token :name))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4813
                   (loop until (equal (node-name (pop-end open-elements))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4814
                                      (getf token :name))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4815
                  (t
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4816
                   (pop-end open-elements)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4817
            (clear-active-formatting-elements)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4818
            (setf phase :in-row))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4819
           (t
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4820
            (perror :unexpected-end-tag :name (getf token :name))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4821
     nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4822
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4823
   (def :in-cell end-tag-ignore ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4824
     (perror :unexpected-end-tag :name (getf token :name))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4825
     nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4826
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4827
   (def :in-cell end-tag-imply ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4828
     (cond ((element-in-scope (getf token :name) "table")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4829
            (close-cell)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4830
            token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4831
           (t
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4832
            ;; sometimes innerHTML case
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4833
            (perror :end-tag-imply-sometimes-inner-html-case)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4834
            nil)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4835
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4836
   (def :in-cell end-tag-other ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4837
     (process-end-tag token :phase :in-body)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4838
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4839
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4840
 ;; InSelectPhase
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4841
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4842
 (tagname-dispatch :in-select process-start-tag
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4843
   ("html" start-tag-html)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4844
   ("option" start-tag-option)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4845
   ("optgroup" start-tag-optgroup)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4846
   ("select" start-tag-select)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4847
   (("input" "keygen" "textarea") start-tag-input)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4848
   ("script" start-tag-script)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4849
   (default start-tag-other))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4850
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4851
 (tagname-dispatch :in-select process-end-tag
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4852
   ("option" end-tag-option)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4853
   ("optgroup" end-tag-optgroup)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4854
   ("select" end-tag-select)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4855
   (default end-tag-other))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4856
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4857
 ;; http://www.whatwg.org/specs/web-apps/current-work/#in-select
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4858
 (def :in-select process-eof (inner-html)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4859
   (if (not (equal (node-name (last-open-element)) "html"))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4860
       (perror :eof-in-select)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4861
       (assert inner-html))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4862
   nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4863
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4864
 (def :in-select process-characters ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4865
   (unless (equal (getf token :data) (string #\u0000))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4866
     (parser-insert-text (getf token :data)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4867
   nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4868
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4869
 (def :in-select start-tag-option (open-elements)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4870
   ;; We need to imply </option> if <option> is the current node.
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4871
   (when (equal (node-name (last-open-element)) "option")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4872
     (pop-end open-elements))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4873
   (insert-element token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4874
   nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4875
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4876
 (def :in-select start-tag-optgroup (open-elements)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4877
   (when (equal (node-name (last-open-element)) "option")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4878
     (pop-end open-elements))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4879
   (when (equal (node-name (last-open-element)) "optgroup")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4880
     (pop-end open-elements))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4881
   (insert-element token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4882
   nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4883
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4884
 (def :in-select start-tag-select ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4885
   (perror :unexpected-select-in-select)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4886
   (end-tag-select (implied-tag-token "select"))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4887
   nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4888
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4889
 (def :in-select start-tag-input (inner-html)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4890
   (perror :unexpected-input-in-select)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4891
   (cond ((element-in-scope "select" "select")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4892
          (end-tag-select (implied-tag-token "select"))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4893
          token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4894
         (t
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4895
          (assert inner-html)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4896
          nil)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4897
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4898
 (def :in-select start-tag-script ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4899
   (process-start-tag token :phase :in-head))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4900
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4901
 (def :in-select start-tag-other ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4902
   (perror :unexpected-start-tag-in-select :name (getf token :name))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4903
   nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4904
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4905
 (def :in-select end-tag-option (open-elements)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4906
   (if (equal (node-name (last-open-element)) "option")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4907
       (pop-end open-elements)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4908
       (perror :unexpected-end-tag-in-select :name (getf token :name)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4909
   nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4910
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4911
 (def :in-select end-tag-optgroup (open-elements)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4912
   ;; </optgroup> implicitly closes <option>
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4913
   (when  (and (equal (node-name (last-open-element)) "option")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4914
               (equal (node-name (elt open-elements
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4915
                                      (- (length open-elements) 2)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4916
                      "optgroup"))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4917
     (pop-end open-elements))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4918
   ;; It also closes </optgroup>
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4919
   (if (equal (node-name (last-open-element)) "optgroup")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4920
       (pop-end open-elements)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4921
       ;; But nothing else
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4922
       (perror :unexpected-end-tag-in-select :name (getf token :name)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4923
   nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4924
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4925
 (def :in-select end-tag-select (inner-html open-elements)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4926
   (cond ((element-in-scope "select" "select")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4927
          (loop until (equal (node-name (pop-end open-elements))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4928
                             "select"))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4929
          (reset-insertion-mode))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4930
         (t
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4931
          ;; innerHTML case
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4932
          (assert inner-html)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4933
          (perror :end-tag-select-in-inner-html-mode)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4934
   nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4935
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4936
 (def :in-select end-tag-other ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4937
   (perror :unexpected-end-tag-in-select :name (getf token :name))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4938
   nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4939
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4940
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4941
 ;; InSelectInTablePhase
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4942
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4943
 (tagname-dispatch :in-select-in-table process-start-tag
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4944
   (("caption" "table" "tbody" "tfoot" "thead" "tr" "td" "th") start-tag-table)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4945
   (default start-tag-other))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4946
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4947
 (tagname-dispatch :in-select-in-table process-end-tag
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4948
   (("caption" "table" "tbody" "tfoot" "thead" "tr" "td" "th") end-tag-table)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4949
   (default end-tag-other))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4950
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4951
 (def :in-select-in-table process-eof ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4952
   (process-eof token :phase :in-select)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4953
   nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4954
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4955
 (def :in-select-in-table process-characters ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4956
   (process-characters token :phase :in-select))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4957
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4958
 (def :in-select-in-table start-tag-table ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4959
   (perror :unexpected-table-element-start-tag-in-select-in-table :name (getf token :name))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4960
   (end-tag-other (implied-tag-token "select"))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4961
   token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4962
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4963
 (def :in-select-in-table start-tag-other ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4964
   (process-start-tag token :phase :in-select))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4965
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4966
 (def :in-select-in-table end-tag-table ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4967
   (perror :unexpected-table-element-end-tag-in-select-in-table :name (getf token :name))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4968
   (cond ((element-in-scope (getf token :name) "table")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4969
          (end-tag-other (implied-tag-token "select"))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4970
          token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4971
         (t
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4972
          nil)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4973
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4974
 (def :in-select-in-table end-tag-other ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4975
   (process-end-tag token :phase :in-select))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4976
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4977
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4978
 ;; InForeignContentPhase
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4979
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4980
 (defparameter +breakout-elements+
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4981
   '("b" "big" "blockquote" "body" "br"
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4982
     "center" "code" "dd" "div" "dl" "dt"
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4983
     "em" "embed" "h1" "h2" "h3"
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4984
     "h4" "h5" "h6" "head" "hr" "i" "img"
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4985
     "li" "listing" "menu" "meta" "nobr"
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4986
     "ol" "p" "pre" "ruby" "s"  "small"
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4987
     "span" "strong" "strike"  "sub" "sup"
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4988
     "table" "tt" "u" "ul" "var"))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4989
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4990
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4991
 (defun adjust-svg-tag-names (token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4992
   (let ((replacement (cdr
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4993
                       (assoc (getf token :name)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4994
                              '(("altglyph" . "altGlyph")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4995
                                ("altglyphdef" . "altGlyphDef")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4996
                                ("altglyphitem" . "altGlyphItem")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4997
                                ("animatecolor" . "animateColor")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4998
                                ("animatemotion" . "animateMotion")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4999
                                ("animatetransform" . "animateTransform")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5000
                                ("clippath" . "clipPath")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5001
                                ("feblend" . "feBlend")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5002
                                ("fecolormatrix" . "feColorMatrix")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5003
                                ("fecomponenttransfer" . "feComponentTransfer")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5004
                                ("fecomposite" . "feComposite")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5005
                                ("feconvolvematrix" . "feConvolveMatrix")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5006
                                ("fediffuselighting" . "feDiffuseLighting")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5007
                                ("fedisplacementmap" . "feDisplacementMap")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5008
                                ("fedistantlight" . "feDistantLight")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5009
                                ("feflood" . "feFlood")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5010
                                ("fefunca" . "feFuncA")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5011
                                ("fefuncb" . "feFuncB")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5012
                                ("fefuncg" . "feFuncG")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5013
                                ("fefuncr" . "feFuncR")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5014
                                ("fegaussianblur" . "feGaussianBlur")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5015
                                ("feimage" . "feImage")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5016
                                ("femerge" . "feMerge")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5017
                                ("femergenode" . "feMergeNode")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5018
                                ("femorphology" . "feMorphology")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5019
                                ("feoffset" . "feOffset")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5020
                                ("fepointlight" . "fePointLight")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5021
                                ("fespecularlighting" . "feSpecularLighting")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5022
                                ("fespotlight" . "feSpotLight")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5023
                                ("fetile" . "feTile")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5024
                                ("feturbulence" . "feTurbulence")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5025
                                ("foreignobject" . "foreignObject")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5026
                                ("glyphref" . "glyphRef")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5027
                                ("lineargradient" . "linearGradient")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5028
                                ("radialgradient" . "radialGradient")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5029
                                ("textpath" . "textPath"))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5030
                              :test #'string=))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5031
     (when replacement
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5032
       (setf (getf token :name) replacement))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5033
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5034
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5035
 (defparameter +only-space-characters-regexp+
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5036
   (cl-ppcre:create-scanner `(:sequence :start-anchor
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5037
                                        (:greedy-repetition
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5038
                                         0 nil
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5039
                                         (:alternation ,@(coerce +space-characters+ 'list)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5040
                                        :end-anchor)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5041
                            :multi-line-mode t))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5042
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5043
 (defun only-space-characters-p (string)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5044
   (cl-ppcre:scan +only-space-characters-regexp+ string))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5045
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5046
 (def :in-foreign-content process-characters (frameset-ok)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5047
   (cond ((equal (getf token :data) (string #\u0000))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5048
          (setf (getf token :data) (string #\uFFFD)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5049
         ((and frameset-ok
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5050
               (not (only-space-characters-p (getf token :data))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5051
          (setf frameset-ok nil)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5052
   (process-characters token :phase nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5053
   nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5054
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5055
 (def :in-foreign-content process-start-tag (html-namespace open-elements)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5056
   (block nil
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5057
     (let ((current-node (last-open-element)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5058
       (cond ((or (member (getf token :name) +breakout-elements+ :test #'string=)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5059
                  (and (string= (getf token :name) "font")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5060
                       (intersection (mapcar #'car (getf token :data))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5061
                                     '("color" "face" "size")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5062
                                     :test #'string=)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5063
              (parser-parse-error :unexpected-html-element-in-foreign-content
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5064
                                  (getf token :name))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5065
              (loop until (or (is-html-integration-point (last-open-element))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5066
                              (is-math-ml-text-integration-point (last-open-element))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5067
                              (equal (node-namespace (last-open-element))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5068
                                     html-namespace))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5069
                    do (pop-end open-elements))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5070
              (return token))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5071
             (t
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5072
              (cond ((equal (node-namespace current-node) (find-namespace "mathml"))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5073
                     (adjust-math-ml-attributes token))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5074
                    ((equal (node-namespace current-node) (find-namespace "svg"))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5075
                     (adjust-svg-tag-names token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5076
                     (adjust-svg-attributes token)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5077
              (adjust-foreign-attributes token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5078
              (setf (getf token :namespace) (node-namespace current-node))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5079
              (insert-element token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5080
              (when (getf token :self-closing)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5081
                (pop-end open-elements)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5082
                (setf (getf token :self-closing-acknowledged) t)))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5083
     nil))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5084
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5085
 (def :in-foreign-content process-end-tag (phase original-phase html-namespace open-elements)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5086
   (let ((new-token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5087
         (node-index (1- (length open-elements)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5088
         (node (last-open-element)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5089
     (unless (string= (node-name node) (getf token :name))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5090
       (parser-parse-error :unexpected-end-tag (getf token :name)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5091
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5092
     (loop
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5093
      (when (string= (ascii-upper-2-lower (node-name node)) (getf token :name))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5094
        ;; XXX this isn't in the spec but it seems necessary
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5095
        (when (eql phase :in-table-text)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5096
          (flush-characters)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5097
          (setf phase original-phase))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5098
        (loop until (eql (pop-end open-elements) node)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5099
              do (assert open-elements))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5100
        (setf new-token nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5101
        (return))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5102
      (decf node-index)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5103
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5104
      (setf node (elt open-elements node-index))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5105
      (when (equal (node-namespace node)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5106
                   html-namespace)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5107
        (setf new-token (process-end-tag token :phase phase))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5108
        (return)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5109
     new-token))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5110
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5111
 ;; AfterBodyPhase
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5112
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5113
 (tagname-dispatch :after-body process-start-tag
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5114
   ("html" start-tag-html)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5115
   (default start-tag-other))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5116
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5117
 (tagname-dispatch :after-body process-end-tag
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5118
   ("html" end-tag-html)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5119
   (default end-tag-other))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5120
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5121
 (def :after-body process-eof ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5122
   ;; Stop parsing
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5123
   nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5124
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5125
 (def :after-body process-comment (open-elements)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5126
   ;; This is needed because data is to be appended to the <html> element
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5127
   ;; here and not to whatever is currently open.
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5128
   (insert-comment token (first open-elements))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5129
   nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5130
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5131
 (def :after-body process-characters (phase)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5132
   (parser-parse-error :unexpected-char-after-body)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5133
   (setf phase :in-body)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5134
   token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5135
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5136
 (def :after-body start-tag-html ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5137
   (process-start-tag token :phase :in-body))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5138
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5139
 (def :after-body start-tag-other (phase)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5140
    (parser-parse-error :unexpected-start-tag-after-body
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5141
                        `(:name ,(getf token :name)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5142
    (setf phase :in-body)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5143
    token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5144
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5145
 (def :after-body end-tag-html (inner-html phase)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5146
   (if inner-html
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5147
       (parser-parse-error :unexpected-end-tag-after-body-innerhtml)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5148
       (setf phase :after-after-body))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5149
   nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5150
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5151
 (def :after-body end-tag-other (phase)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5152
   (parser-parse-error :unexpected-end-tag-after-body
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5153
                       `(:name ,(getf token :name)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5154
   (setf phase :in-body)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5155
   token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5156
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5157
 ;; InFramesetPhase
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5158
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5159
 (tagname-dispatch :in-frameset process-start-tag
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5160
   ("html" start-tag-html)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5161
   ("frameset" start-tag-frameset)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5162
   ("frame" start-tag-frame)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5163
   ("noframes"start-tag-noframes)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5164
   (default start-tag-other))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5165
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5166
 (tagname-dispatch :in-frameset process-end-tag
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5167
   ("frameset" end-tag-frameset)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5168
   (default end-tag-other))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5169
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5170
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5171
 (def :in-frameset process-eof (inner-html)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5172
   (if (string/= (node-name (last-open-element)) "html")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5173
       (parser-parse-error :eof-in-frameset)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5174
       (assert inner-html))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5175
   nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5176
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5177
 (def :in-frameset process-characters ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5178
   (parser-parse-error :unexpected-char-in-frameset)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5179
   nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5180
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5181
 (def :in-frameset start-tag-frameset ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5182
   (insert-element token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5183
   nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5184
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5185
 (def :in-frameset start-tag-frame (open-elements)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5186
   (insert-element token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5187
   (pop-end open-elements)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5188
   nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5189
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5190
 (def :in-frameset start-tag-noframes ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5191
   (process-start-tag token :phase :in-body))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5192
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5193
 (def :in-frameset start-tag-other ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5194
   (parser-parse-error :unexpected-start-tag-in-frameset
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5195
                       `(:name ,(getf token :name)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5196
   nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5197
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5198
 (def :in-frameset end-tag-frameset (phase inner-html open-elements)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5199
   (if (string= (node-name (last-open-element)) "html")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5200
       ;; innerHTML case
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5201
       (parser-parse-error :unexpected-frameset-in-frameset-innerhtml)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5202
       (pop-end open-elements))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5203
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5204
   (when (and (not inner-html)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5205
              (string/= (node-name (last-open-element)) "frameset"))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5206
     ;; If we're not in innerHTML mode and the the current node is not a
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5207
     ;; "frameset" element (anymore) then switch.
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5208
     (setf phase :after-frameset))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5209
   nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5210
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5211
 (def :in-frameset end-tag-other ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5212
   (parser-parse-error :unexpected-end-tag-in-frameset
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5213
                       `(:name ,(getf token :name)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5214
   nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5215
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5216
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5217
 ;; AfterFramesetPhase
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5218
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5219
 (tagname-dispatch :after-frameset process-start-tag
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5220
   ("html" start-tag-html)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5221
   ("noframes" start-tag-noframes)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5222
   (default start-tag-other))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5223
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5224
 (tagname-dispatch :after-frameset process-end-tag
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5225
   ("html" end-tag-html)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5226
   (default end-tag-other))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5227
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5228
 (def :after-frameset process-eof ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5229
   ;; Stop parsing
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5230
   nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5231
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5232
 (def :after-frameset process-characters ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5233
   (parser-parse-error :unexpected-char-after-frameset)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5234
   nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5235
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5236
 (def :after-frameset start-tag-noframes ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5237
   (process-start-tag token :phase :in-head))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5238
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5239
 (def :after-frameset start-tag-other ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5240
   (parser-parse-error :unexpected-start-tag-after-frameset
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5241
                       `(:name ,(getf token :name)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5242
   nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5243
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5244
 (def :after-frameset end-tag-html (phase)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5245
   (setf phase :after-after-frameset)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5246
   nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5247
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5248
 (def :after-frameset end-tag-other ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5249
   (parser-parse-error :unexpected-end-tag-after-frameset
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5250
                       `(:name ,(getf token :name)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5251
   nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5252
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5253
 ;; AfterAfterBodyPhase
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5254
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5255
 (tagname-dispatch :after-after-body process-start-tag
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5256
   ("html" start-tag-html)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5257
   (default start-tag-other))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5258
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5259
 (def :after-after-body process-eof ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5260
   nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5261
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5262
 (def :after-after-body process-comment ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5263
   (insert-comment token (document*))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5264
   nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5265
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5266
 (def :after-after-body process-space-characters ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5267
   (process-space-characters token :phase :in-body))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5268
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5269
 (def :after-after-body process-characters (phase)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5270
   (parser-parse-error :expected-eof-but-got-char)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5271
   (setf phase :in-body)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5272
   token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5273
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5274
 (def :after-after-body start-tag-html ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5275
   (process-start-tag token :phase :in-body))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5276
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5277
 (def :after-after-body start-tag-other (phase)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5278
   (parser-parse-error :expected-eof-but-got-start-tag
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5279
                       `(:name (getf token :name)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5280
   (setf phase :in-body)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5281
   token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5282
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5283
 (def :after-after-body process-end-tag (phase)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5284
   (parser-parse-error :expected-eof-but-got-end-tag
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5285
                       `(:name (getf token :name)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5286
   (setf phase :in-body)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5287
   token)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5288
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5289
 ;; AfterAfterFramesetPhase
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5290
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5291
 (tagname-dispatch :after-after-frameset process-start-tag
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5292
   ("html" start-tag-html)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5293
   ("noframes" start-tag-noframes)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5294
   (default start-tag-other))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5295
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5296
 (def :after-after-frameset process-eof ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5297
   nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5298
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5299
 (def :after-after-frameset process-comment ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5300
   (insert-comment token (document*))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5301
   nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5302
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5303
 (def :after-after-frameset process-space-characters ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5304
   (process-space-characters token :phase :in-body))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5305
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5306
 (def :after-after-frameset process-characters ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5307
   (parser-parse-error :expected-eof-but-got-char)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5308
   nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5309
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5310
 (def :after-after-frameset start-tag-html ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5311
   (process-start-tag token :phase :in-body))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5312
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5313
 (def :after-after-frameset start-tag-noframes ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5314
   (process-start-tag token :phase :in-head))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5315
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5316
 (def :after-after-frameset start-tag-other ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5317
   (parser-parse-error :expected-eof-but-got-start-tag
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5318
                       `(:name (getf token :name)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5319
   nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5320
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5321
 (def :after-after-frameset process-end-tag ()
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5322
   (parser-parse-error :expected-eof-but-got-end-tag
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5323
                       `(:name (getf token :name)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5324
   nil)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5325
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5326
 ;;; toxml
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5327
 (defun xml-escape-name (name)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5328
   "Escapes a node name (element, attribute, doctype) by replacing any
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5329
 character not valid in XML name by Uxxxxxx, where x is the code point
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5330
 as six hex digits. This encoding is reversable, since the HTML parser
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5331
 down cases all characters in names.
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5332
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5333
 See: https://www.w3.org/TR/html5/syntax.html#coercing-an-html-dom-into-an-infoset"
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5334
   (if (and (xml-name-start-char-p (char name 0))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5335
            (every #'xml-name-char-p name))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5336
       name
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5337
       (with-output-to-string (out)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5338
         (loop for first = t then nil
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5339
               for c across name do
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5340
                 (if (if first
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5341
                         (xml-name-start-char-p c)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5342
                         (xml-name-char-p c))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5343
                     (princ c out)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5344
                     (format out "U~:@(~6,'0X~)" (char-code c)))))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5345
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5346
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5347
 (defun xml-unescape-name (name)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5348
   "Reverert escaping done by xml-unescape-name."
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5349
   (cl-ppcre:regex-replace-all
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5350
    "U[0-9A-F]{6}"
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5351
    name
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5352
    (lambda (u)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5353
      (string (code-char (parse-integer u :start 1 :radix 16))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5354
    :simple-calls t))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5355
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5356
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5357
 (defun xml-name-start-char-p (c)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5358
   (or (char<= #\a c #\z)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5359
       (char= #\_ c)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5360
       (char<= #\A c #\Z)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5361
       (char<= (code-char #xC0) c (code-char #xD6))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5362
       (char<= (code-char #xD8) c (code-char #xF6))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5363
       (char<= (code-char #xF8) c (code-char #x2FF))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5364
       (char<= (code-char #x370) c (code-char #x37D))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5365
       (char<= (code-char #x37F) c (code-char #x1FFF))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5366
       (char<= (code-char #x200C) c (code-char #x200D))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5367
       (char<= (code-char #x2070) c (code-char #x218F))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5368
       (char<= (code-char #x2C00) c (code-char #x2FEF))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5369
       (char<= (code-char #x3001) c (code-char #xD7FF))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5370
       (char<= (code-char #xF900) c (code-char #xFDCF))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5371
       (char<= (code-char #xFDF0) c (code-char #xFFFD))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5372
       (char<= (code-char #x10000) c (code-char #xEFFFF))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5373
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5374
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5375
 (defun xml-name-char-p (c)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5376
   (or (xml-name-start-char-p c)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5377
       (char= #\- c)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5378
       (char= #\. c)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5379
       (char<= #\0 c #\9)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5380
       (char= (code-char #xB7) c)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5381
       (char<= (code-char #x0300) c (code-char #x036F))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5382
       (char<= (code-char #x203F) c (code-char #x2040))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5383
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5384
 ;;; XML DOM
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5385
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5386
 (defmethod transform-html5-dom ((to-type (eql :xmls)) node
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5387
                                 &key namespace comments)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5388
   "Convert a node into an XMLS-compatible tree of conses, starting
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5389
 at. If the node is a document-fragement a list of XMLS trees is returned."
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5390
   (labels ((node-to-xmls (node parent-ns xlink-defined)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5391
            (ecase (node-type node)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5392
              (:document
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5393
               (let (root)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5394
                 (element-map-children (lambda (n)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5395
                                         (when (string= (node-name n) "html")
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5396
                                           (setf root n)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5397
                                       node)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5398
                 (assert root)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5399
                 (node-to-xmls root parent-ns xlink-defined)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5400
              (:document-fragment
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5401
               (let (xmls-nodes)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5402
                 (element-map-children (lambda (node)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5403
                                         (push (node-to-xmls node parent-ns xlink-defined)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5404
                                               xmls-nodes))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5405
                                       node)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5406
                 (nreverse xmls-nodes)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5407
              (:element
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5408
               (let (attrs children)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5409
                 (element-map-attributes (lambda (name node-namespace value)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5410
                                           (when (and namespace
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5411
                                                      (not xlink-defined)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5412
                                                      (equal node-namespace (find-namespace "xlink")))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5413
                                             (push '#.(list "xmlns:xlink" (find-namespace "xlink")) attrs)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5414
                                             (setf xlink-defined t))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5415
                                           (push (list (if node-namespace
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5416
                                                           name
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5417
                                                           (xml-escape-name name))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5418
                                                       value)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5419
                                                 attrs))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5420
                                         node)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5421
                 (element-map-children (lambda (c)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5422
                                         (push c children))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5423
                                       node)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5424
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5425
                 (apply #'list
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5426
                        (if (and namespace
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5427
                                 (not (equal parent-ns (node-namespace node))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5428
                            (cons (node-name node) (node-namespace node))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5429
                            (xml-escape-name (node-name node)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5430
                        attrs
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5431
                        (mapcar (lambda (c)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5432
                                  (node-to-xmls c (node-namespace node) xlink-defined))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5433
                                (nreverse children)))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5434
              (:text
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5435
               (node-value node))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5436
              (:comment
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5437
               (when comments
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5438
                 (list :comment nil (node-value node)))))))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5439
     (node-to-xmls node nil nil)))
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5440
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5441
 
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5442
 (defmethod transform-html5-dom ((to-type (eql :xmls-ns)) node &key)
a3b65a8138ac html,http init, uuid, db stuff
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5443
   (transform-html5-dom :xmls node :namespace t))