changelog shortlog graph tags branches changeset files revisions annotate raw help

Mercurial > core / lisp/lib/net/codec/punycode.lisp

changeset 698: 96958d3eb5b0
parent: 524dfb768c7a
author: Richard Westhaver <ellis@rwest.io>
date: Fri, 04 Oct 2024 22:04:59 -0400
permissions: -rw-r--r--
description: fixes
1 ;;; lib/net/codec/punycode.lisp --- RFC 3492 Punycode strings
2 
3 ;; This library was written by Yukari Hafner <shinmera@tymoon.eu>: https://github.com/Shinmera/punycode.git
4 
5 ;;; Code:
6 (in-package :net/codec/punycode)
7 
8 (defconstant INITIAL-N #x80)
9 (defconstant INITIAL-BIAS 72)
10 (defconstant BASE 36)
11 (defconstant TMAX 26)
12 (defconstant TMIN 1)
13 (defconstant SKEW 38)
14 (defconstant DAMP 700)
15 
16 (defun encode-digit (code)
17  (code-char (+ code (if (< code 26) 97 22))))
18 
19 (defun adapt (delta num-points first-time-p)
20  (setf delta (if first-time-p
21  (truncate delta DAMP)
22  (ash delta -1)))
23  (incf delta (truncate delta num-points))
24  (loop for k from 0 by BASE
25  while (< (ash (* TMAX (- BASE TMIN)) -1) delta)
26  do (setf delta (truncate delta (- BASE TMIN)))
27  finally (return (+ k (truncate (* delta (+ 1 (- BASE TMIN))) (+ delta SKEW))))))
28 
29 (defmacro with-stream (stream-ish &body body)
30  (let ((thunk (gensym "THUNK")))
31  `(flet ((,thunk (,stream-ish)
32  ,@body))
33  (etypecase out
34  (null
35  (with-output-to-string (,stream-ish)
36  (,thunk ,stream-ish)))
37  ((eql T)
38  (,thunk *standard-output*))
39  (stream
40  (,thunk ,stream-ish))))))
41 
42 (defun encode-punycode (string &optional out)
43  (with-stream out
44  (let ((n INITIAL-N)
45  (bias INITIAL-BIAS)
46  (delta 0)
47  (basic 0))
48  (loop for i from 0 below (length string)
49  for char = (char string i)
50  for code = (char-code char)
51  do (when (< code 128)
52  (write-char char out)
53  (incf basic)))
54  (unless (= 0 basic)
55  (write-char #\- out))
56  (loop with handled = basic
57  for m = most-positive-fixnum
58  for handled+1 = (1+ handled)
59  while (< handled (length string))
60  do (loop for char across string
61  for code = (char-code char)
62  do (when (<= n code (1- m))
63  (setf m code)))
64  (incf delta (* (- m n) handled+1))
65  (setf n m)
66  (loop for char across string
67  for code = (char-code char)
68  do (let ((q delta))
69  (when (< code n)
70  (incf delta))
71  (when (= n code)
72  (loop for k from BASE by BASE
73  for tt = (cond ((<= k bias) TMIN)
74  ((<= (+ bias TMAX) k) TMAX)
75  (T (- k bias)))
76  do (when (< q tt) (return))
77  (write-char (encode-digit (+ tt (mod (- q tt) (- BASE tt)))) out)
78  (setf q (truncate (- q tt) (- BASE tt))))
79  (write-char (encode-digit q) out)
80  (setf bias (adapt delta handled+1 (= handled basic)))
81  (setf delta 0)
82  (incf handled))))
83  (incf delta)
84  (incf n)))))
85 
86 (defun decode-digit (char)
87  (let ((code (char-code char)))
88  (cond ((<= #x30 code #x39) (+ 26 (- code #x30)))
89  ((<= #x41 code #x5A) (- code #x41))
90  ((<= #x61 code #x7A) (- code #x61))
91  (T BASE))))
92 
93 (defun decode-punycode (string &optional out)
94  (let* ((i 0)
95  (n INITIAL-N)
96  (bias INITIAL-BIAS)
97  (basic (or (position #\- string :from-end T) 0))
98  (written basic)
99  (uni (make-array (length string) :element-type 'character)))
100  ;; This is gross, I know. But we can't stream things out nicely because
101  ;; later mixed codepoints can have the same target index, causing earlier
102  ;; codepoints to be shifted downwards, which we obviously cannot do if we
103  ;; already emitted the codepoint to stream. So we instead copy to a string
104  ;; first.
105  (loop for i from 0 below basic
106  do (setf (char uni i) (char string i)))
107  (flet ((insert (pos char)
108  (loop for i downfrom written above pos
109  do (setf (char uni i) (char uni (1- i))))
110  (setf (char uni pos) char)))
111  (loop with in = (if (< 0 basic) (1+ basic) 0)
112  for old-i = i
113  while (< in (length string))
114  do (loop with w = 1
115  for k from BASE by BASE
116  for digit = (decode-digit (char string in))
117  do (incf in)
118  (incf i (* digit w))
119  (let ((tt (cond ((<= k bias) TMIN)
120  ((<= (+ bias TMAX) k) TMAX)
121  (T (- k bias)))))
122  (when (< digit tt) (return))
123  (setf w (* w (- base tt)))))
124  (incf written)
125  (setf bias (adapt (- i old-i) written (= old-i 0)))
126  (incf n (truncate i written))
127  (setf i (mod i written))
128  (insert i (code-char n))
129  (incf i)))
130  (with-stream out
131  (write-string uni out :end written))))
132 
133 (defun encode-domain (string &optional out)
134  (with-stream out
135  (loop for start = 0 then (1+ end)
136  for end = (or (position #\. string :start start) (length string))
137  do (cond ((loop for i from start below end
138  thereis (< 127 (char-code (char string i))))
139  (write-string "xn--" out)
140  (encode-punycode (subseq string start end) out))
141  (T
142  (write-string string out :start start :end end)))
143  (if (< end (length string))
144  (write-char #\. out)
145  (return)))))
146 
147 (defun decode-domain (string &optional out)
148  (with-stream out
149  (loop for start = 0 then (1+ end)
150  for end = (or (position #\. string :start start) (length string))
151  do (cond ((and (< (length "xn--") (- end start))
152  (string= "xn--" string :start2 start :end2 (+ start (length "xn--"))))
153  (decode-punycode (subseq string (+ start (length "xn--")) end) out))
154  (T
155  (write-string string out :start start :end end)))
156  (if (< end (length string))
157  (write-char #\. out)
158  (return)))))
159 
160 (eval-when (:load-toplevel)
161  (push :rfc3492 *features*))