From 211563241b92dc518e179c461326a1bef2ba835f Mon Sep 17 00:00:00 2001 From: Robert Smith Date: Sat, 4 Aug 2018 19:58:24 -0700 Subject: add internal control *LOOK-AHEAD-FOR-SUFFIX* This allows better performance for some scanning use-cases. We also bump the minor version to 2.1.0. --- api.lisp | 7 ++++++- cl-ppcre.asd | 2 +- docs/index.html | 26 ++++++++++++++++++++++++++ packages.lisp | 1 + 4 files changed, 34 insertions(+), 2 deletions(-) diff --git a/api.lisp b/api.lisp index 4c11c45..f440361 100644 --- a/api.lisp +++ b/api.lisp @@ -31,6 +31,10 @@ (in-package :cl-ppcre) +(defvar *look-ahead-for-suffix* t + "Controls whether scanners will optimistically look ahead for a + constant suffix of a regular expression, if there is one.") + (defgeneric create-scanner (regex &key case-insensitive-mode multi-line-mode single-line-mode @@ -120,7 +124,8 @@ modify its first argument \(but only if it's a parse tree).")) (end-string (end-string regex)) ;; if we found a non-zero-length end-string we create an ;; efficient search function for it - (end-string-test (and end-string + (end-string-test (and *look-ahead-for-suffix* + end-string (plusp (len end-string)) (if (= 1 (len end-string)) (create-char-searcher diff --git a/cl-ppcre.asd b/cl-ppcre.asd index b1e3547..f1c83af 100644 --- a/cl-ppcre.asd +++ b/cl-ppcre.asd @@ -37,7 +37,7 @@ (in-package :cl-ppcre-asd) (defsystem :cl-ppcre - :version "2.0.11" + :version "2.1.0" :description "Perl-compatible regular expression library" :author "Dr. Edi Weitz" :license "BSD" diff --git a/docs/index.html b/docs/index.html index 4cd0d91..6da6a15 100644 --- a/docs/index.html +++ b/docs/index.html @@ -103,6 +103,7 @@ href="http://weitz.de/regex-coach/">The Regex Coach.
  • *optimize-char-classes*
  • *allow-quoting*
  • *allow-named-registers* +
  • *look-ahead-for-suffix*
  • Miscellaneous
      @@ -1391,6 +1392,31 @@ to which value *ALLOW-NAMED-REGISTERS* is bound at that time. +


      [Special variable] +
      *look-ahead-for-suffix* + +


      Given a regular expression which has a constant +suffix, such as (a|b)+x whose constant suffix +is x, the scanners created +by CREATE-SCANNER will +attempt to optimize by searching for the position of the suffix prior +to performing the full match. In many cases, this is an optimization, +especially when backtracking is involved on small strings. However, in +other cases, such as incremental parsing of a very large string, this +can cause a degradation in performance, because the entire string is +searched for the suffix before an otherwise easy prefix match failure +can occur. The variable *LOOK-AHEAD-FOR-SUFFIX*, whose +default is T, can be used to selectively control this +behavior. +

      +Note: Due to the nature of LOAD-TIME-VALUE and the compiler macro for SCAN and other functions, some +scanners might be created in a null +lexical environment at load time or at compile time so be careful +to which value *LOOK-AHEAD-FOR-SUFFIX* is bound at that +time.

      +

      Miscellaneous


      [Function] diff --git a/packages.lisp b/packages.lisp index bfb10d1..74faa44 100644 --- a/packages.lisp +++ b/packages.lisp @@ -59,6 +59,7 @@ :*allow-named-registers* :*optimize-char-classes* :*property-resolver* + :*look-ahead-for-suffix* :ppcre-error :ppcre-invocation-error :ppcre-syntax-error -- cgit v1.2.3-70-g09d2