1.1--- /dev/null Thu Jan 01 00:00:00 1970 +0000
1.2+++ b/lisp/ffi/tree-sitter/api.lisp Thu Dec 28 22:03:26 2023 -0500
1.3@@ -0,0 +1,133 @@
1.4+;;; ffi/tree-sitter/api.lisp --- High-level API for Tree-sitter library
1.5+
1.6+;; High-level Tree-sitter API
1.7+
1.8+;;; Code:
1.9+(in-package :tree-sitter)
1.10+
1.11+(defstruct (node (:type list))
1.12+ type range children)
1.13+
1.14+(defun make-lisp-name (string)
1.15+ (intern (string-upcase (substitute #\- #\_ string))
1.16+ (load-time-value (find-package "KEYWORD"))))
1.17+
1.18+(define-condition tree-sitter-error (error)
1.19+ ())
1.20+
1.21+(define-condition create-parser-error (tree-sitter-error)
1.22+ ())
1.23+
1.24+(define-condition set-language-error (tree-sitter-error)
1.25+ ((language :initarg :language :reader tree-sitter-error-language)))
1.26+
1.27+(define-condition parse-string-error (tree-sitter-error)
1.28+ ((string :initarg :string :reader tree-sitter-error-string)
1.29+ (string-start :initarg :string-start :reader tree-sitter-error-string-start)
1.30+ (string-end :initarg :string-end :reader tree-sitter-error-string-end)
1.31+ (language :initarg :language :reader tree-sitter-error-language)))
1.32+
1.33+(define-condition null-node-pointer (tree-sitter-error)
1.34+ ())
1.35+
1.36+(define-condition null-tree-cursor-pointer (tree-sitter-error)
1.37+ ())
1.38+
1.39+;; util
1.40+(defmacro with-ts-node-pointer ((var node) &body forms)
1.41+ `(let ((,var ,node))
1.42+ (when (sb-alien:null-alien ,var)
1.43+ (error 'null-node-pointer))
1.44+ (unwind-protect
1.45+ (progn ,@forms)
1.46+ (sb-alien:free-alien ,var))))
1.47+
1.48+(defmacro with-tree-cursor-pointer ((var tree) &body forms &aux (node (gensym)))
1.49+ `(with-ts-node-pointer (,node (ts-tree-root-node-pointer ,tree))
1.50+ (let ((,var (ts-tree-cursor-new-pointer ,node)))
1.51+ (when (sb-alien:null-alien ,var)
1.52+ (error 'null-tree-cursor-pointer))
1.53+ (unwind-protect
1.54+ (progn ,@forms)
1.55+ (ts-tree-cursor-delete ,var)))))
1.56+
1.57+(defun parse-string (language string &key (start 0) end produce-cst (name-generator #'make-lisp-name))
1.58+ "Parse a STRING that represents LANGUAGE code using tree-sitter. START is
1.59+where to start parsing STRING. END is where to stop parsing STRING.
1.60+When PRODUCE-CST is set, the full concrete syntax tree will be produced as
1.61+opposed to the abstract syntax tree. See 'Named vs Anonymous Nodes':
1.62+http://tree-sitter.github.io/tree-sitter/using-parsers#named-vs-anonymous-nodes
1.63+NAME-GENERATOR is a function which converts a string from tree-sitter into a
1.64+desired name for use in lisp."
1.65+ (let ((parser (ts-parser-new)))
1.66+ (when (sb-alien:null-alien parser)
1.67+ (error 'cant-create-parser))
1.68+ (unwind-protect (parse-string-with-language language string parser
1.69+ :start start
1.70+ :end end
1.71+ :produce-cst produce-cst
1.72+ :name-generator name-generator)
1.73+ (ts-parser-delete parser))))
1.74+
1.75+(defun parse-string-with-language (language string parser
1.76+ &key (start 0) end produce-cst name-generator)
1.77+ (unless (ts-parser-set-language parser (language-module language))
1.78+ (error 'cant-set-language :language language))
1.79+ (let* ((string-start start)
1.80+ (string-end (or end (length string)))
1.81+ ;; TODO: this might need to be +1 if it's actually a c-string for null
1.82+ (string-length (- string-end string-start))
1.83+ (string-to-pass (if (plusp string-start)
1.84+ (subseq string string-start string-end)
1.85+ string))
1.86+ (tree (ts-parser-parse-string parser string-to-pass string-length)))
1.87+ (when (sb-alien:null-alien tree)
1.88+ (error 'cant-parse-string
1.89+ :string string
1.90+ :string-start start
1.91+ :string-end end
1.92+ :language language))
1.93+ (unwind-protect (convert-foreign-tree-to-list tree :produce-cst produce-cst
1.94+ :name-generator name-generator)
1.95+ (ts-tree-delete tree))))
1.96+
1.97+(defun convert-foreign-tree-to-list (tree &key produce-cst name-generator
1.98+ &aux did-visit-children parse-stack)
1.99+ (with-tree-cursor-pointer (cursor tree)
1.100+ ;; Closely follows tree-sitter-cli parse
1.101+ ;; implementation with a modification to
1.102+ ;; allow for production of the full CST.
1.103+ (loop
1.104+ (with-ts-node-pointer (node (ts-tree-cursor-current-node-pointer cursor))
1.105+ (let ((is-named (or produce-cst (ts-node-is-named-pointer node))))
1.106+ (cond (did-visit-children
1.107+ (when (and is-named (second parse-stack))
1.108+ (let ((item (pop parse-stack)))
1.109+ (setf (node-children item)
1.110+ (nreverse (node-children item)))
1.111+ (push item (node-children (first parse-stack)))))
1.112+ (cond ((ts-tree-cursor-goto-next-sibling cursor)
1.113+ (setf did-visit-children nil))
1.114+ ((ts-tree-cursor-goto-parent cursor)
1.115+ (setf did-visit-children t))
1.116+ (t
1.117+ (let ((root (first parse-stack)))
1.118+ (setf (node-children root)
1.119+ (nreverse (node-children root)))
1.120+ (return root)))))
1.121+ (t
1.122+ (when is-named
1.123+ (let ((start-point (ts-node-start-point-pointer node))
1.124+ (end-point (ts-node-end-point-pointer node))
1.125+ (type (funcall name-generator (ts-node-type-pointer node)))
1.126+ (field-name-ptr (ts-tree-cursor-current-field-name cursor)))
1.127+ (unless (sb-alien:null-alien field-name-ptr)
1.128+ ;; TODO
1.129+ (let ((field-name (deref field-name-ptr)))
1.130+ (setf type (list (funcall name-generator field-name) type))))
1.131+ (push (make-node :type type
1.132+ :range (list (list (second start-point) (fourth start-point))
1.133+ (list (second end-point) (fourth end-point))))
1.134+ parse-stack)))
1.135+ (setf did-visit-children
1.136+ (not (ts-tree-cursor-goto-first-child cursor))))))))))