changelog shortlog graph tags branches changeset file revisions annotate raw help

Mercurial > core / lisp/ffi/tree-sitter/api.lisp

revision 140: 9b7ec8636a2d
child 141: 1f2cc49dbec6
     1.1--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2+++ b/lisp/ffi/tree-sitter/api.lisp	Thu Dec 28 22:03:26 2023 -0500
     1.3@@ -0,0 +1,133 @@
     1.4+;;; ffi/tree-sitter/api.lisp --- High-level API for Tree-sitter library
     1.5+
     1.6+;; High-level Tree-sitter API
     1.7+
     1.8+;;; Code:
     1.9+(in-package :tree-sitter)
    1.10+
    1.11+(defstruct (node (:type list))
    1.12+  type range children)
    1.13+
    1.14+(defun make-lisp-name (string)
    1.15+  (intern (string-upcase (substitute #\- #\_ string))
    1.16+          (load-time-value (find-package "KEYWORD"))))
    1.17+
    1.18+(define-condition tree-sitter-error (error)
    1.19+  ())
    1.20+
    1.21+(define-condition create-parser-error (tree-sitter-error)
    1.22+  ())
    1.23+
    1.24+(define-condition set-language-error (tree-sitter-error)
    1.25+  ((language :initarg :language :reader tree-sitter-error-language)))
    1.26+
    1.27+(define-condition parse-string-error (tree-sitter-error)
    1.28+  ((string :initarg :string :reader tree-sitter-error-string)
    1.29+   (string-start :initarg :string-start :reader tree-sitter-error-string-start)
    1.30+   (string-end :initarg :string-end :reader tree-sitter-error-string-end)
    1.31+   (language :initarg :language :reader tree-sitter-error-language)))
    1.32+
    1.33+(define-condition null-node-pointer (tree-sitter-error)
    1.34+  ())
    1.35+
    1.36+(define-condition null-tree-cursor-pointer (tree-sitter-error)
    1.37+  ())
    1.38+
    1.39+;; util
    1.40+(defmacro with-ts-node-pointer ((var node) &body forms)
    1.41+  `(let ((,var ,node))
    1.42+     (when (sb-alien:null-alien ,var)
    1.43+       (error 'null-node-pointer))
    1.44+     (unwind-protect
    1.45+          (progn ,@forms)
    1.46+       (sb-alien:free-alien ,var))))
    1.47+
    1.48+(defmacro with-tree-cursor-pointer ((var tree) &body forms &aux (node (gensym)))
    1.49+  `(with-ts-node-pointer (,node (ts-tree-root-node-pointer ,tree))
    1.50+     (let ((,var (ts-tree-cursor-new-pointer ,node)))
    1.51+       (when (sb-alien:null-alien ,var)
    1.52+         (error 'null-tree-cursor-pointer))
    1.53+       (unwind-protect
    1.54+            (progn ,@forms)
    1.55+         (ts-tree-cursor-delete ,var)))))
    1.56+
    1.57+(defun parse-string (language string &key (start 0) end produce-cst (name-generator #'make-lisp-name))
    1.58+  "Parse a STRING that represents LANGUAGE code using tree-sitter. START is
    1.59+where to start parsing STRING. END is where to stop parsing STRING.
    1.60+When PRODUCE-CST is set, the full concrete syntax tree will be produced as
    1.61+opposed to the abstract syntax tree. See 'Named vs Anonymous Nodes':
    1.62+http://tree-sitter.github.io/tree-sitter/using-parsers#named-vs-anonymous-nodes
    1.63+NAME-GENERATOR is a function which converts a string from tree-sitter into a
    1.64+desired name for use in lisp."
    1.65+  (let ((parser (ts-parser-new)))
    1.66+    (when (sb-alien:null-alien parser)
    1.67+      (error 'cant-create-parser))
    1.68+    (unwind-protect (parse-string-with-language language string parser
    1.69+                                                :start start
    1.70+                                                :end end
    1.71+                                                :produce-cst produce-cst
    1.72+                                                :name-generator name-generator)
    1.73+      (ts-parser-delete parser))))
    1.74+
    1.75+(defun parse-string-with-language (language string parser
    1.76+                                   &key (start 0) end produce-cst name-generator)
    1.77+  (unless (ts-parser-set-language parser (language-module language))
    1.78+    (error 'cant-set-language :language language))
    1.79+  (let* ((string-start start)
    1.80+         (string-end (or end (length string)))
    1.81+         ;; TODO: this might need to be +1 if it's actually a c-string for null
    1.82+         (string-length (- string-end string-start))
    1.83+         (string-to-pass (if (plusp string-start)
    1.84+                             (subseq string string-start string-end)
    1.85+                             string))
    1.86+         (tree (ts-parser-parse-string parser string-to-pass string-length)))
    1.87+    (when (sb-alien:null-alien tree)
    1.88+      (error 'cant-parse-string
    1.89+             :string string
    1.90+             :string-start start
    1.91+             :string-end end
    1.92+             :language language))
    1.93+    (unwind-protect (convert-foreign-tree-to-list tree :produce-cst produce-cst
    1.94+                                                       :name-generator name-generator)
    1.95+      (ts-tree-delete tree))))
    1.96+
    1.97+(defun convert-foreign-tree-to-list (tree &key produce-cst name-generator
    1.98+                                     &aux did-visit-children parse-stack)
    1.99+  (with-tree-cursor-pointer (cursor tree)
   1.100+    ;; Closely follows tree-sitter-cli parse
   1.101+    ;; implementation with a modification to
   1.102+    ;; allow for production of the full CST.
   1.103+    (loop
   1.104+      (with-ts-node-pointer (node (ts-tree-cursor-current-node-pointer cursor))
   1.105+        (let ((is-named (or produce-cst (ts-node-is-named-pointer node))))
   1.106+          (cond (did-visit-children
   1.107+                 (when (and is-named (second parse-stack))
   1.108+                   (let ((item (pop parse-stack)))
   1.109+                     (setf (node-children item)
   1.110+                           (nreverse (node-children item)))
   1.111+                     (push item (node-children (first parse-stack)))))
   1.112+                 (cond ((ts-tree-cursor-goto-next-sibling cursor)
   1.113+                        (setf did-visit-children nil))
   1.114+                       ((ts-tree-cursor-goto-parent cursor)
   1.115+                        (setf did-visit-children t))
   1.116+                       (t
   1.117+                        (let ((root (first parse-stack)))
   1.118+                          (setf (node-children root)
   1.119+                                (nreverse (node-children root)))
   1.120+                          (return root)))))
   1.121+                (t
   1.122+                 (when is-named
   1.123+                   (let ((start-point (ts-node-start-point-pointer node))
   1.124+                         (end-point (ts-node-end-point-pointer node))
   1.125+                         (type (funcall name-generator (ts-node-type-pointer node)))
   1.126+                         (field-name-ptr (ts-tree-cursor-current-field-name cursor)))
   1.127+                     (unless (sb-alien:null-alien field-name-ptr)
   1.128+                       ;; TODO
   1.129+                       (let ((field-name (deref field-name-ptr)))
   1.130+                         (setf type (list (funcall name-generator field-name) type))))
   1.131+                     (push (make-node :type type
   1.132+                                      :range (list (list (second start-point) (fourth start-point))
   1.133+                                                   (list (second end-point) (fourth end-point))))
   1.134+                           parse-stack)))
   1.135+                 (setf did-visit-children
   1.136+                       (not (ts-tree-cursor-goto-first-child cursor))))))))))