summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRobert Smith <robert@rigetti.com>2018-08-04 19:58:24 -0700
committerStas Boukarev <stassats@gmail.com>2018-08-05 15:15:24 +0300
commit211563241b92dc518e179c461326a1bef2ba835f (patch)
tree8e0b30430317aa023d003960d71c5b9eca251357
parentd01bbf70cdf1f15ee3734fe80572276caa482e77 (diff)
add internal control *LOOK-AHEAD-FOR-SUFFIX*v2.1.0
This allows better performance for some scanning use-cases. We also bump the minor version to 2.1.0.
-rw-r--r--api.lisp7
-rw-r--r--cl-ppcre.asd2
-rw-r--r--docs/index.html26
-rw-r--r--packages.lisp1
4 files changed, 34 insertions, 2 deletions
diff --git a/api.lisp b/api.lisp
index 4c11c45..f440361 100644
--- a/api.lisp
+++ b/api.lisp
@@ -31,6 +31,10 @@
(in-package :cl-ppcre)
+(defvar *look-ahead-for-suffix* t
+ "Controls whether scanners will optimistically look ahead for a
+ constant suffix of a regular expression, if there is one.")
+
(defgeneric create-scanner (regex &key case-insensitive-mode
multi-line-mode
single-line-mode
@@ -120,7 +124,8 @@ modify its first argument \(but only if it's a parse tree)."))
(end-string (end-string regex))
;; if we found a non-zero-length end-string we create an
;; efficient search function for it
- (end-string-test (and end-string
+ (end-string-test (and *look-ahead-for-suffix*
+ end-string
(plusp (len end-string))
(if (= 1 (len end-string))
(create-char-searcher
diff --git a/cl-ppcre.asd b/cl-ppcre.asd
index b1e3547..f1c83af 100644
--- a/cl-ppcre.asd
+++ b/cl-ppcre.asd
@@ -37,7 +37,7 @@
(in-package :cl-ppcre-asd)
(defsystem :cl-ppcre
- :version "2.0.11"
+ :version "2.1.0"
:description "Perl-compatible regular expression library"
:author "Dr. Edi Weitz"
:license "BSD"
diff --git a/docs/index.html b/docs/index.html
index 4cd0d91..6da6a15 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -103,6 +103,7 @@ href="http://weitz.de/regex-coach/">The Regex Coach</a>.
<li><a href="#*optimize-char-classes*"><code>*optimize-char-classes*</code></a>
<li><a href="#*allow-quoting*"><code>*allow-quoting*</code></a>
<li><a href="#*allow-named-registers*"><code>*allow-named-registers*</code></a>
+ <li><a href="#*look-ahead-for-suffix*"><code>*look-ahead-for-suffix*</code></a>
</ol>
<li><a href="#misc">Miscellaneous</a>
<ol>
@@ -1391,6 +1392,31 @@ to which value <code>*ALLOW-NAMED-REGISTERS*</code> is bound at that
time.</blockquote>
</blockquote>
+<p><br>[Special variable]
+<br><a class=none name="*look-ahead-for-suffix*"><b>*look-ahead-for-suffix*</b></a>
+
+<blockquote><br>Given a regular expression which has a constant
+suffix, such as <code>(a|b)+x</code> whose constant suffix
+is <code>x</code>, the scanners created
+by <a href="#create-scanner"><code>CREATE-SCANNER</code></a> will
+attempt to optimize by searching for the position of the suffix prior
+to performing the full match. In many cases, this is an optimization,
+especially when backtracking is involved on small strings. However, in
+other cases, such as incremental parsing of a very large string, this
+can cause a degradation in performance, because the entire string is
+searched for the suffix before an otherwise easy prefix match failure
+can occur. The variable <code>*LOOK-AHEAD-FOR-SUFFIX*</code>, whose
+default is <code>T</code>, can be used to selectively control this
+behavior.
+<p>
+Note: Due to the nature of <a href="http://www.lispworks.com/documentation/HyperSpec/Body/s_ld_tim.htm"><code>LOAD-TIME-VALUE</code></a> and the <a
+href="#compiler-macro">compiler macro for <code>SCAN</code> and other functions</a>, some
+scanners might be created in a <a
+href="http://www.lispworks.com/documentation/HyperSpec/Body/26_glo_n.htm#null_lexical_environment">null
+lexical environment</a> at load time or at compile time so be careful
+to which value <code>*LOOK-AHEAD-FOR-SUFFIX*</code> is bound at that
+time.</blockquote>
+
<h4><a name="misc" class=none>Miscellaneous</a></h4>
<p><br>[Function]
diff --git a/packages.lisp b/packages.lisp
index bfb10d1..74faa44 100644
--- a/packages.lisp
+++ b/packages.lisp
@@ -59,6 +59,7 @@
:*allow-named-registers*
:*optimize-char-classes*
:*property-resolver*
+ :*look-ahead-for-suffix*
:ppcre-error
:ppcre-invocation-error
:ppcre-syntax-error