*** empty log message ***

author: Gerd Moellmann <gerd@gnu.org> 2001-10-01 07:31:59 +0000
committer: Gerd Moellmann <gerd@gnu.org> 2001-10-01 07:31:59 +0000
commit: 12c645037675823286ff13ad4c1b18478e73346d (patch)
tree: a23409defd7d37d17a8e6605840eba93beebd800 /lisp/emacs-lisp/rx.el
parent: 78f2fcaf91aee5237cfabf97e22011c8ce8cbc6e (diff)
1 files changed, 753 insertions, 0 deletions
diff --git a/lisp/emacs-lisp/rx.el b/lisp/emacs-lisp/rx.el
new file mode 100644
index 00000000000..92eea6d00a3
--- /dev/null
+++ b/lisp/emacs-lisp/rx.el
@@ -0,0 +1,753 @@
+;;; rx.el --- sexp notation for regular expressions
+
+;; Copyright (C) 2001 Free Software Foundation, Inc.
+
+;; Author: Gerd Moellmann <gerd@gnu.org>
+;; Maintainer: FSF
+;; Keywords: strings, regexps, extensions
+
+;; This file is part of GNU Emacs.
+
+;; GNU Emacs is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 2, or (at your option)
+;; any later version.
+
+;; GNU Emacs is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GNU Emacs; see the file COPYING.  If not, write to the
+;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+;; Boston, MA 02111-1307, USA.
+
+;;; Commentary:
+
+;; This is another implementation of sexp-form regular expressions.
+;; It was unfortunately written without being aware of the Sregex
+;; package coming with Emacs, but as things stand, Rx completely
+;; covers all regexp features, which Sregex doesn't, doesn't suffer
+;; from the bugs mentioned in the commentary section of Sregex, and
+;; uses a nicer syntax (IMHO, of course :-).
+
+;; Rx translates a sexp notation for regular expressions into the
+;; usual string notation.  The translation can be done at compile-time
+;; by using the `rx' macro.  It can be done at run-time by calling
+;; function `rx-to-string'.  See the documentation of `rx' for a
+;; complete description of the sexp notation.
+;;
+;; Some examples of string regexps and their sexp counterparts:
+;;
+;; "^[a-z]*"
+;; (rx (and line-start (0+ (in "a-z"))))
+;;
+;; "\n[^ \t]"
+;; (rx (and "\n" (not blank))), or
+;; (rx (and "\n" (not (any " \t"))))
+;;
+;; "\\*\\*\\* EOOH \\*\\*\\*\n"
+;; (rx "*** EOOH ***\n")
+;;
+;; "\\<\\(catch\\|finally\\)\\>[^_]"
+;; (rx (and word-start (submatch (or "catch" "finally")) word-end
+;;          (not (any ?_))))
+;;
+;; "[ \t\n]*:\\([^:]+\\|$\\)"
+;; (rx (and (zero-or-more (in " \t\n")) ":"
+;;          (submatch (or line-end (one-or-more (not (any ?:)))))))
+;;
+;; "^content-transfer-encoding:\\(\n?[\t ]\\)*quoted-printable\\(\n?[\t ]\\)*"
+;; (rx (and line-start
+;;          "content-transfer-encoding:"
+;;          (+ (? ?\n) blank)
+;;	    "quoted-printable"
+;;	    (+ (? ?\n) blank))
+;;
+;; (concat "^\\(?:" something-else "\\)")
+;; (rx (and line-start (eval something-else))), statically or
+;; (rx-to-string '(and line-start ,something-else)), dynamically.
+;;
+;; (regexp-opt '(STRING1 STRING2 ...))
+;; (rx (or STRING1 STRING2 ...)), or in other words, `or' automatically
+;; calls `regexp-opt' as needed.
+;;
+;; "^;;\\s-*\n\\|^\n"
+;; (rx (or (and line-start ";;" (0+ space) ?\n)
+;;         (and line-start ?\n)))
+;;
+;; "\\$[I]d: [^ ]+ \\([^ ]+\\) "
+;; (rx (and "$Id": " 
+;;          (1+ (not (in " "))) 
+;;          " "
+;;          (submatch (1+ (not (in " "))))
+;;          " ")))
+;;
+;; "\\\\\\\\\\[\\w+"
+;; (rx (and ?\\ ?\\ ?\[ (1+ word)))
+;;
+;; etc.
+
+;;; History:
+;; 
+
+;;; Code:
+
+
+(defconst rx-constituents
+  '((and		. (rx-and 1 nil))
+    (or			. (rx-or 1 nil))
+    (not-newline	. ".")
+    (anything		. ".\\|\n")
+    (any		. (rx-any 1 1 rx-check-any))
+    (in			. any)
+    (not		. (rx-not 1 1 rx-check-not))
+    (repeat		. (rx-repeat 2 3))
+    (submatch		. (rx-submatch 1 nil))
+    (group		. submatch)
+    (zero-or-more	. (rx-kleene 1 1))
+    (one-or-more	. (rx-kleene 1 1))
+    (zero-or-one	. (rx-kleene 1 1))
+    (\?			. zero-or-one)
+    (\??		. zero-or-one)
+    (*			. zero-or-more)
+    (*?			. zero-or-more)
+    (0+			. zero-or-more)
+    (+			. one-or-more)
+    (+?			. one-or-more)
+    (1+			. one-or-more)
+    (optional		. zero-or-one)
+    (minimal-match	. (rx-greedy 1 1))
+    (maximal-match	. (rx-greedy 1 1))
+    (line-start		. "^")
+    (line-end		. "$")
+    (string-start	. "\\`")
+    (string-end		. "\\'")
+    (buffer-start	. "\\`")
+    (buffer-end		. "\\'")
+    (point		. "\\=")
+    (word-start		. "\\<")
+    (word-end		. "\\>")
+    (word-boundary	. "\\b")
+    (syntax		. (rx-syntax 1 1))
+    (category		. (rx-category 1 1 rx-check-category))
+    (eval		. (rx-eval 1 1))
+    (regexp		. (rx-regexp 1 1 stringp))
+    (digit		. "[[:digit:]]")
+    (control		. "[[:cntrl:]]")
+    (hex-digit		. "[[:xdigit:]]")
+    (blank		. "[[:blank:]]")
+    (graphic		. "[[:graph:]]")
+    (printing		. "[[:print:]]")
+    (alphanumeric	. "[[:alnum:]]")
+    (letter		. "[[:alpha:]]")
+    (ascii		. "[[:ascii:]]")
+    (nonascii		. "[[:nonascii:]]")
+    (lower		. "[[:lower:]]")
+    (punctuation	. "[[:punct:]]")
+    (space		. "[[:space:]]")
+    (upper		. "[[:upper:]]")
+    (word		. "[[:word:]]"))
+  "Alist of sexp form regexp constituents.
+Each element of the alist has the form (SYMBOL . DEFN).
+SYMBOL is a valid constituent of sexp regular expressions.
+If DEFN is a string, SYMBOL is translated into DEFN.
+If DEFN is a symbol, use the definition of DEFN, recursively.
+Otherwise, DEFN must be a list (FUNCTION MIN-ARGS MAX-ARGS PREDICATE).
+FUNCTION is used to produce code for SYMBOL.  MIN-ARGS and MAX-ARGS
+are the minimum and maximum number of arguments the function-form
+sexp constituent SYMBOL may have in sexp regular expressions.
+MAX-ARGS nil means no limit.  PREDICATE, if specified, means that
+all arguments must satisfy PREDICATE.")
+
+
+(defconst rx-syntax
+  '((whitespace		. ?-)
+    (punctuation	. ?.)
+    (word		. ?w)
+    (symbol		. ?_)
+    (open-parenthesis	. ?\()
+    (close-parenthesis	. ?\))
+    (expression-prefix	. ?\')
+    (string-quote	. ?\")
+    (paired-delimiter	. ?$)
+    (escape		. ?\\)
+    (character-quote	. ?/)
+    (comment-start	. ?<)
+    (comment-end	. ?>))
+  "Alist mapping Rx syntax symbols to syntax characters.
+Each entry has the form (SYMBOL . CHAR), where SYMBOL is a valid
+symbol in `(syntax SYMBOL)', and CHAR is the syntax character
+corresponding to SYMBOL, as it would be used with \\s or \\S in
+regular expressions.")
+
+
+(defconst rx-categories
+  '((consonant			. ?0)
+    (base-vowel			. ?1)
+    (upper-diacritical-mark	. ?2)
+    (lower-diacritical-mark	. ?3)
+    (tone-mark			. ?4)
+    (symbol			. ?5)
+    (digit			. ?6)
+    (vowel-modifying-diacritical-mark . ?7)
+    (vowel-sign			. ?8)
+    (semivowel-lower		. ?9)
+    (not-at-end-of-line		. ?<)
+    (not-at-beginning-of-line	. ?>)
+    (alpha-numeric-two-byte	. ?A)
+    (chinse-two-byte		. ?C)
+    (greek-two-byte		. ?G)
+    (japanese-hiragana-two-byte . ?H)
+    (indian-two-byte		. ?I)
+    (japanese-katakana-two-byte . ?K)
+    (korean-hangul-two-byte	. ?N)
+    (cyrillic-two-byte		. ?Y)
+    (ascii			. ?a)
+    (arabic			. ?b)
+    (chinese			. ?c)
+    (ethiopic			. ?e)
+    (greek			. ?g)
+    (korean			. ?h)
+    (indian			. ?i)
+    (japanese			. ?j)
+    (japanese-katakana		. ?k)
+    (latin			. ?l)
+    (lao			. ?o)
+    (tibetan			. ?q)
+    (japanese-roman		. ?r)
+    (thai			. ?t)
+    (vietnamese			. ?v)
+    (hebrew			. ?w)
+    (cyrillic			. ?y)
+    (can-break			. ?|))
+  "Alist mapping symbols to category characters.
+Each entry has the form (SYMBOL . CHAR), where SYMBOL is a valid
+symbol in `(category SYMBOL)', and CHAR is the category character
+corresponding to SYMBOL, as it would be used with `\\c' or `\\C' in
+regular expression strings.")
+
+
+(defvar rx-greedy-flag t
+  "Non-nil means produce greedy regular expressions for `zero-or-one',
+`zero-or-more', and `one-or-more'.  Dynamically bound.")
+
+
+(defun rx-info (op)
+  "Return parsing/code generation info for OP.
+If OP is the space character ASCII 32, return info for the symbol `?'.
+If OP is the character `?', return info for the symbol `??'.
+See also `rx-constituents'."
+  (cond ((eq op ? ) (setq op '\?))
+	((eq op ??) (setq op '\??)))
+  (while (and (not (null op)) (symbolp op))
+    (setq op (cdr (assq op rx-constituents))))
+  op)
+    
+
+(defun rx-check (form)
+  "Check FORM according to its car's parsing info."
+  (let* ((rx (rx-info (car form)))
+	 (nargs (1- (length form)))
+	 (min-args (nth 1 rx))
+	 (max-args (nth 2 rx))
+	 (type-pred (nth 3 rx)))
+    (when (and (not (null min-args))
+	       (< nargs min-args))
+      (error "Rx form `%s' requires at least %d args"
+	     (car form) min-args))
+    (when (and (not (null max-args))
+	       (> nargs max-args))
+      (error "Rx form `%s' accepts at most %d args"
+	     (car form) max-args))
+    (when (not (null type-pred))
+      (dolist (sub-form (cdr form))
+	(unless (funcall type-pred sub-form)
+	  (error "Rx form `%s' requires args satisfying `%s'"
+		 (car form) type-pred))))))
+
+
+(defun rx-and (form)
+  "Parse and produce code from FORM.
+FORM is of the form `(and FORM1 ...)'."
+  (rx-check form)
+  (mapconcat #'rx-to-string (cdr form) nil))
+
+
+(defun rx-or (form)
+  "Parse and produce code from FORM, which is `(or FORM1 ...)'."
+  (rx-check form)
+  (let ((all-args-strings t))
+    (dolist (arg (cdr form))
+      (unless (stringp arg)
+	(setq all-args-strings nil)))
+    (if all-args-strings
+	(regexp-opt (cdr form))
+      (mapconcat #'rx-to-string (cdr form) "\\|"))))
+
+
+(defun rx-quote-for-set (string)
+  "Transform STRING for use in a character set.
+If STRING contains a `]', move it to the front.
+If STRING starts with a '^', move it to the end."
+  (when (string-match "\\`\\(\\(?:.\\|\n\\)+\\)\\]\\(\\(?:.\\|\n\\)\\)*\\'"
+		      string)
+    (setq string (concat "]" (match-string 1 string)
+			 (match-string 2 string))))
+  (when (string-match "\\`^\\(\\(?:.\\|\n\\)+\\)\\'" string)
+    (setq string (concat (substring string 1) "^")))
+  string)
+
+
+(defun rx-check-any (arg)
+   "Check arg ARG for Rx `any'."
+   (cond ((integerp arg) t)
+	 ((and (stringp arg) (zerop (length arg)))
+	  (error "String arg for Rx `any' must not be empty"))
+	 ((stringp arg) t)
+	 (t
+	  (error "Rx `any' requires string or character arg"))))
+
+
+(defun rx-any (form)
+  "Parse and produce code from FORM, which is `(any STRING)'.
+STRING is optional.  If it is omitted, build a regexp that
+matches anything."
+  (rx-check form)
+  (let ((arg (cadr form)))
+    (cond ((integerp arg)
+	   (char-to-string arg))
+	  ((= (length arg) 1)
+	   arg)
+	  (t
+	   (concat "[" (rx-quote-for-set (cadr form)) "]")))))
+
+
+(defun rx-check-not (form)
+  "Check arguments of FORM.  FORM is `(not ...)'."
+  (unless (or (memq form
+		    '(digit control hex-digit blank graphic printing
+			    alphanumeric letter ascii nonascii lower
+			    punctuation space upper word))
+	      (and (consp form)
+		   (memq (car form) '(not any in syntax category:))))
+    (error "Rx `not' syntax error: %s" form))
+    t)
+
+
+(defun rx-not (form)
+  "Parse and produce code from FORM.  FORM is `(not ...)'."
+  (rx-check form)
+  (let ((result (rx-to-string (cadr form) 'no-group)))
+    (cond ((string-match "\\`\\[^" result)
+	   (if (= (length result) 4)
+	       (substring result 2 3)
+	     (concat "[" (substring result 2))))
+	  ((string-match "\\`\\[" result)
+	   (concat "[^" (substring result 1)))
+	  ((string-match "\\`\\\\s." result)
+	   (concat "\\S" (substring result 2)))
+	  ((string-match "\\`\\\\S." result)
+	   (concat "\\s" (substring result 2)))
+	  ((string-match "\\`\\\\c." result)
+	   (concat "\\C" (substring result 2)))
+	  ((string-match "\\`\\\\C." result)
+	   (concat "\\c" (substring result 2)))
+	  ((string-match "\\`\\\\B" result)
+	   (concat "\\b" (substring result 2)))
+	  ((string-match "\\`\\\\b" result)
+	   (concat "\\B" (substring result 2)))
+	  (t
+	   (concat "[^" result "]")))))
+
+
+(defun rx-repeat (form)
+  "Parse and produce code from FORM.
+FORM is either `(repeat N FORM1)' or `(repeat N M FORM1)'."
+  (rx-check form)
+  (cond ((= (length form) 3)
+	 (unless (and (integerp (nth 1 form))
+		      (> (nth 1 form) 0))
+	   (error "Rx `repeat' requires positive integer first arg"))
+	 (format "%s\\{%d\\}" (rx-to-string (nth 2 form)) (nth 1 form)))
+	((or (not (integerp (nth 2 form)))
+	     (< (nth 2 form) 0)
+	     (not (integerp (nth 1 form)))
+	     (< (nth 1 form) 0)
+	     (< (nth 2 form) (nth 1 form)))
+	 (error "Rx `repeat' range error"))
+	(t
+	 (format "%s\\{%d,%d\\}" (rx-to-string (nth 3 form))
+		 (nth 1 form) (nth 2 form)))))
+
+
+(defun rx-submatch (form)
+  "Parse and produce code from FORM, which is `(submatch ...)'."
+  (concat "\\(" (mapconcat #'rx-to-string (cdr form) nil) "\\)"))
+
+
+(defun rx-kleene (form)
+  "Parse and produce code from FORM.
+FORM is `(OP FORM1)', where OP is one of the `zero-or-one',
+`zero-or-more' etc.  operators.  
+If OP is one of `*', `+', `?', produce a greedy regexp.
+If OP is one of `*?', `+?', `??', produce a non-greedy regexp.
+If OP is anything else, produce a greedy regexp if `rx-greedy-flag'
+is non-nil."
+  (rx-check form)
+  (let ((suffix (cond ((memq (car form) '(* + ? )) "")
+		      ((memq (car form) '(*? +? ??)) "?")
+		      (rx-greedy-flag "")
+		      (t "?")))
+	(op (cond ((memq (car form) '(* *? 0+ zero-or-more)) "*")
+		  ((memq (car form) '(+ +? 1+ one-or-more))  "+")
+		  (t "?"))))
+    (format "\\(?:%s\\)%s%s" (rx-to-string (cadr form) 'no-group) 
+	    op suffix)))
+
+
+(defun rx-syntax (form)
+  "Parse and produce code from FORM, which is `(syntax SYMBOL)'."
+  (rx-check form)
+  (let ((syntax (assq (cadr form) rx-syntax)))
+    (unless syntax
+      (error "Unknown rx syntax `%s'" (cadr form)))
+    (format "\\s%c" (cdr syntax))))
+
+
+(defun rx-check-category (form)
+  "Check the argument FORM of a `(category FORM)'."
+  (unless (or (integerp form)
+	      (cdr (assq form rx-categories)))
+    (error "Unknown category `%s'" form))
+  t)
+			    
+
+(defun rx-category (form)
+  "Parse and produce code from FORM, which is `(category SYMBOL ...)'."
+  (rx-check form)
+  (let ((char (if (integerp (cadr form))
+		  (cadr form)
+		(cdr (assq (cadr form) rx-categories)))))
+    (format "\\c%c" char)))
+
+
+(defun rx-eval (form)
+  "Parse and produce code from FORM, which is `(eval FORM)'."
+  (rx-check form)
+  (rx-to-string (eval (cadr form))))
+
+
+(defun rx-greedy (form)
+  "Parse and produce code from FORM.  If FORM is '(minimal-match
+FORM1)', non-greedy versions of `*', `+', and `?' operators will be
+used in FORM1.  If FORM is '(maximal-match FORM1)', greedy operators
+will be used."
+  (rx-check form)
+  (let ((rx-greedy-flag (eq (car form) 'maximal-match)))
+    (rx-to-string (cadr form))))
+
+
+(defun rx-regexp (form)
+  "Parse and produce code from FORM, which is `(regexp STRING)'."
+  (rx-check form)
+  (concat "\\(?:" (cadr form) "\\)"))
+
+
+;;;###autoload
+(defun rx-to-string (form &optional no-group)
+  "Parse and produce code for regular expression FORM.
+FORM is a regular expression in sexp form.
+NO-GROUP non-nil means don't put shy groups around the result."
+  (cond ((stringp form)
+	 (regexp-quote form))
+	((integerp form)
+	 (regexp-quote (char-to-string form)))
+	((symbolp form)
+	 (let ((info (rx-info form)))
+	   (cond ((stringp info)
+		  info)
+		 ((null info)
+		  (error "Unknown Rx form `%s'" form))
+		 (t 
+		  (funcall (nth 0 info) form)))))
+	((consp form)
+	 (let ((info (rx-info (car form))))
+	   (unless (consp info)
+	     (error "Unknown Rx form `%s'" (car form)))
+	   (let ((result (funcall (nth 0 info) form)))
+	     (if (or no-group (string-match "\\`\\\\[(]" result))
+		 result
+	       (concat "\\(?:" result "\\)")))))
+	(t
+	 (error "Rx syntax error at `%s'" form))))
+
+
+;;;###autoload
+(defmacro rx (regexp)
+  "Translate a regular expression REGEXP in sexp form to a regexp string.
+See also `rx-to-string' for how to do such a translation at run-time.
+
+The following are valid subforms of regular expressions in sexp
+notation.
+
+STRING
+     matches string STRING literally.
+
+CHAR
+     matches character CHAR literally.
+
+`not-newline'
+     matches any character except a newline.
+			.
+`anything'
+     matches any character
+
+`(any SET)'
+     matches any character in SET.  SET may be a character or string.
+     Ranges of characters can be specified as `A-Z' in strings.
+
+'(in SET)' 
+     like `any'.
+
+`(not (any SET))'
+     matches any character not in SET
+
+`line-start'
+     matches the empty string, but only at the beginning of a line
+     in the text being matched
+
+`line-end'
+     is similar to `line-start' but matches only at the end of a line
+
+`string-start'
+     matches the empty string, but only at the beginning of the
+     string being matched against.
+
+`string-end'
+     matches the empty string, but only at the end of the
+     string being matched against.
+
+`buffer-start'
+     matches the empty string, but only at the beginning of the
+     buffer being matched against.
+
+`buffer-end'
+     matches the empty string, but only at the end of the
+     buffer being matched against.
+
+`point'
+     matches the empty string, but only at point.
+
+`word-start'
+     matches the empty string, but only at the beginning or end of a
+     word.
+
+`word-end'
+     matches the empty string, but only at the end of a word.
+
+`word-boundary'
+     matches the empty string, but only at the beginning or end of a
+     word.
+
+`(not word-boundary)'
+     matches the empty string, but not at the beginning or end of a
+     word.
+
+`digit'
+     matches 0 through 9.
+
+`control'
+     matches ASCII control characters.
+
+`hex-digit'
+     matches 0 through 9, a through f and A through F.
+
+`blank'
+     matches space and tab only.
+
+`graphic'
+     matches graphic characters--everything except ASCII control chars,
+     space, and DEL.
+
+`printing'
+     matches printing characters--everything except ASCII control chars
+     and DEL.
+
+`alphanumeric'
+     matches letters and digits.  (But at present, for multibyte characters,
+     it matches anything that has word syntax.)
+
+`letter'
+     matches letters.  (But at present, for multibyte characters,
+     it matches anything that has word syntax.)
+
+`ascii'
+     matches ASCII (unibyte) characters.
+
+`nonascii'
+     matches non-ASCII (multibyte) characters.
+
+`lower'
+     matches anything lower-case.
+
+`upper'
+     matches anything upper-case.
+
+`punctuation'
+     matches punctuation.  (But at present, for multibyte characters,
+     it matches anything that has non-word syntax.)
+
+`space'
+     matches anything that has whitespace syntax.
+
+`word'
+     matches anything that has word syntax.
+
+`(syntax SYNTAX)'
+     matches a character with syntax SYNTAX.  SYNTAX must be one
+     of the following symbols.
+
+     `whitespace'		(\\s- in string notation)
+     `punctuation'		(\\s.)
+     `word'			(\\sw)
+     `symbol'			(\\s_)
+     `open-parenthesis'		(\\s()
+     `close-parenthesis'	(\\s))
+     `expression-prefix'	(\\s')
+     `string-quote'		(\\s\")
+     `paired-delimiter'		(\\s$)
+     `escape'			(\\s\\)
+     `character-quote'		(\\s/)
+     `comment-start'		(\\s<)
+     `comment-end'		(\\s>)
+
+`(not (syntax SYNTAX))'
+     matches a character that has not syntax SYNTAX.
+
+`(category CATEGORY)'
+     matches a character with category CATEGORY.  CATEGORY must be
+     either a character to use for C, or one of the following symbols.
+
+     `consonant'			(\\c0 in string notation)
+     `base-vowel'			(\\c1)
+     `upper-diacritical-mark'		(\\c2)
+     `lower-diacritical-mark'		(\\c3)
+     `tone-mark'		        (\\c4)
+     `symbol'			        (\\c5)
+     `digit'			        (\\c6)
+     `vowel-modifying-diacritical-mark'	(\\c7)
+     `vowel-sign'			(\\c8)
+     `semivowel-lower'			(\\c9)
+     `not-at-end-of-line'		(\\c<)
+     `not-at-beginning-of-line'		(\\c>)
+     `alpha-numeric-two-byte'		(\\cA)
+     `chinse-two-byte'			(\\cC)
+     `greek-two-byte'			(\\cG)
+     `japanese-hiragana-two-byte'	(\\cH)
+     `indian-tow-byte'			(\\cI)
+     `japanese-katakana-two-byte'	(\\cK)
+     `korean-hangul-two-byte'		(\\cN)
+     `cyrillic-two-byte'		(\\cY)
+     `ascii'				(\\ca)
+     `arabic'				(\\cb)
+     `chinese'				(\\cc)
+     `ethiopic'				(\\ce)
+     `greek'				(\\cg)
+     `korean'				(\\ch)
+     `indian'				(\\ci)
+     `japanese'				(\\cj)
+     `japanese-katakana'		(\\ck)
+     `latin'				(\\cl)
+     `lao'				(\\co)
+     `tibetan'				(\\cq)
+     `japanese-roman'			(\\cr)
+     `thai'				(\\ct)
+     `vietnamese'			(\\cv)
+     `hebrew'				(\\cw)
+     `cyrillic'				(\\cy)
+     `can-break'			(\\c|)
+
+`(not (category CATEGORY))'
+     matches a character that has not category CATEGORY.
+
+`(and SEXP1 SEXP2 ...)'
+     matches what SEXP1 matches, followed by what SEXP2 matches, etc.
+
+`(submatch SEXP1 SEXP2 ...)'
+     like `and', but makes the match accessible with `match-end',
+     `match-beginning', and `match-string'.
+
+`(group SEXP1 SEXP2 ...)'
+     another name for `submatch'.
+
+`(or SEXP1 SEXP2 ...)'
+     matches anything that matches SEXP1 or SEXP2, etc.  If all
+     args are strings, use `regexp-opt' to optimize the resulting
+     regular expression.
+
+`(minimal-match SEXP)'
+     produce a non-greedy regexp for SEXP.  Normally, regexps matching
+     zero or more occurrances of something are \"greedy\" in that they
+     match as much as they can, as long as the overall regexp can
+     still match.  A non-greedy regexp matches as little as possible.
+
+`(maximal-match SEXP)'
+     produce a greedy regexp for SEXP.   This is the default.
+
+`(zero-or-more SEXP)'
+     matches zero or more occurrences of what SEXP matches.
+
+`(0+ SEXP)'
+     like `zero-or-more'.
+
+`(* SEXP)'
+     like `zero-or-more', but always produces a greedy regexp.
+
+`(*? SEXP)'
+     like `zero-or-more', but always produces a non-greedy regexp.
+
+`(one-or-more SEXP)'
+     matches one or more occurrences of A.
+  
+`(1+ SEXP)'
+     like `one-or-more'.
+
+`(+ SEXP)'
+     like `one-or-more', but always produces a greedy regexp.
+
+`(+? SEXP)'
+     like `one-or-more', but always produces a non-greedy regexp.
+
+`(zero-or-one SEXP)'
+     matches zero or one occurrences of A.
+     
+`(optional SEXP)'
+     like `zero-or-one'.
+
+`(? SEXP)'
+     like `zero-or-one', but always produces a greedy regexp.
+
+`(?? SEXP)'
+     like `zero-or-one', but always produces a non-greedy regexp.
+
+`(repeat N SEXP)'
+     matches N occurrences of what SEXP matches.
+
+`(repeat N M SEXP)'
+     matches N to M occurrences of what SEXP matches.
+
+`(eval FORM)'
+      evaluate FORM and insert result.   If result is a string,
+      `regexp-quote' it.
+
+`(regexp REGEXP)'
+      include REGEXP in string notation in the result."
+
+  `(rx-to-string ',regexp))
+
+
+(provide 'rx)
+
+;;; rx.el ends here
author	Gerd Moellmann <gerd@gnu.org>	2001-10-01 07:31:59 +0000
committer	Gerd Moellmann <gerd@gnu.org>	2001-10-01 07:31:59 +0000
commit	12c645037675823286ff13ad4c1b18478e73346d (patch)
tree	a23409defd7d37d17a8e6605840eba93beebd800 /lisp/emacs-lisp/rx.el
parent	78f2fcaf91aee5237cfabf97e22011c8ce8cbc6e (diff)