From 6a0fa333039adf4340c85aab53eeab35bd8032c4 Mon Sep 17 00:00:00 2001 From: Nathan Froyd Date: Wed, 13 Jan 2010 16:20:48 -0500 Subject: [PATCH 01/76] add new ENCODE-STATE and DECODE-STATE --- octets.lisp | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/octets.lisp b/octets.lisp index cae7171..243cef8 100644 --- a/octets.lisp +++ b/octets.lisp @@ -140,6 +140,23 @@ octet vector with a fill pointer." (funcall encode-fun octets start end table writer) return-value)))) +(defun required-argument () + (error "Required argument not provided")) + +(defstruct (encode-state + (:copier nil) + (:constructor)) + (encoded-length (required-argument) :read-only t) + (octets->string (required-argument) :read-only t) + (octets->octets (required-argument) :read-only t)) + +(defstruct (decode-state + (:copier nil) + (:constructor)) + (decoded-length (required-argument) :read-only t) + (string->octets (required-argument) :read-only t) + (octets->octets (required-argument) :read-only t)) + (defun decode-octets (destination string format &key (start 0) end decoded-length case-fold map01 &allow-other-keys) From 0d7273891209380b4b61ecbcb8eefb9ac2b8a9d5 Mon Sep 17 00:00:00 2001 From: Nathan Froyd Date: Wed, 13 Jan 2010 16:21:06 -0500 Subject: [PATCH 02/76] add ARRAY-DATA-AND-OFFSETS --- octets.lisp | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/octets.lisp b/octets.lisp index 243cef8..ef7dfa5 100644 --- a/octets.lisp +++ b/octets.lisp @@ -157,6 +157,18 @@ octet vector with a fill pointer." (string->octets (required-argument) :read-only t) (octets->octets (required-argument) :read-only t)) +(declaim (inline array-data-and-offsets)) +(defun array-data-and-offsets (v start end) + "Like ARRAY-DISPLACEMENT, only more useful." + #+sbcl + (sb-kernel:with-array-data ((v v) (start start) (end end)) + (values v start end)) + #+cmu + (lisp::with-array-data ((v v) (start start) (end end)) + (values v start end)) + #-(or sbcl cmu) + (values v start (or end (length v)))) + (defun decode-octets (destination string format &key (start 0) end decoded-length case-fold map01 &allow-other-keys) From 72c20d6e6bc279e4b158a607ab82e1c42ac075ce Mon Sep 17 00:00:00 2001 From: Nathan Froyd Date: Wed, 13 Jan 2010 16:42:42 -0500 Subject: [PATCH 03/76] add new types.lisp file --- binascii.asd | 3 ++- octets.lisp | 2 -- types.lisp | 5 +++++ 3 files changed, 7 insertions(+), 3 deletions(-) create mode 100644 types.lisp diff --git a/binascii.asd b/binascii.asd index 10aaf52..5d3b9ec 100644 --- a/binascii.asd +++ b/binascii.asd @@ -12,7 +12,8 @@ :description "A library of ASCII encoding schemes for binary data" :components ((:static-file "LICENSE") (:file "package") - (:file "octets" :depends-on ("package")) + (:file "types" :depends-on ("package")) + (:file "octets" :depends-on ("types")) (:file "ascii85" :depends-on ("octets")) (:file "base85" :depends-on ("octets")) (:file "base64" :depends-on ("octets")) diff --git a/octets.lisp b/octets.lisp index ef7dfa5..18cda7d 100644 --- a/octets.lisp +++ b/octets.lisp @@ -2,8 +2,6 @@ (cl:in-package :binascii) -(deftype index () '(mod #.array-dimension-limit)) - (defgeneric encoding-tools (format) (:documentation "Return three values: the basic encoding function for FORMAT, an encoded-length function for FORMAT, and an encoding table for diff --git a/types.lisp b/types.lisp new file mode 100644 index 0000000..b196ea1 --- /dev/null +++ b/types.lisp @@ -0,0 +1,5 @@ +;;;; types.lisp -- various useful types + +(cl:in-package :binascii) + +(deftype index () '(mod #.array-dimension-limit)) From 925cffb9ba327308a12f20834991c87cb27664e3 Mon Sep 17 00:00:00 2001 From: Nathan Froyd Date: Wed, 13 Jan 2010 16:43:14 -0500 Subject: [PATCH 04/76] move ENCODE-STATE and DECODE-STATE to types.lisp --- octets.lisp | 17 ----------------- types.lisp | 17 +++++++++++++++++ 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/octets.lisp b/octets.lisp index 18cda7d..2098177 100644 --- a/octets.lisp +++ b/octets.lisp @@ -138,23 +138,6 @@ octet vector with a fill pointer." (funcall encode-fun octets start end table writer) return-value)))) -(defun required-argument () - (error "Required argument not provided")) - -(defstruct (encode-state - (:copier nil) - (:constructor)) - (encoded-length (required-argument) :read-only t) - (octets->string (required-argument) :read-only t) - (octets->octets (required-argument) :read-only t)) - -(defstruct (decode-state - (:copier nil) - (:constructor)) - (decoded-length (required-argument) :read-only t) - (string->octets (required-argument) :read-only t) - (octets->octets (required-argument) :read-only t)) - (declaim (inline array-data-and-offsets)) (defun array-data-and-offsets (v start end) "Like ARRAY-DISPLACEMENT, only more useful." diff --git a/types.lisp b/types.lisp index b196ea1..cc305b3 100644 --- a/types.lisp +++ b/types.lisp @@ -3,3 +3,20 @@ (cl:in-package :binascii) (deftype index () '(mod #.array-dimension-limit)) + +(defun required-argument () + (error "Required argument not provided")) + +(defstruct (encode-state + (:copier nil) + (:constructor)) + (encoded-length (required-argument) :read-only t) + (octets->string (required-argument) :read-only t) + (octets->octets (required-argument) :read-only t)) + +(defstruct (decode-state + (:copier nil) + (:constructor)) + (decoded-length (required-argument) :read-only t) + (string->octets (required-argument) :read-only t) + (octets->octets (required-argument) :read-only t)) From 12a7b7cda11be7332eceede34bddad27cc5a7732 Mon Sep 17 00:00:00 2001 From: Nathan Froyd Date: Wed, 13 Jan 2010 16:44:43 -0500 Subject: [PATCH 05/76] add SIMPLE-OCTET-VECTOR type --- types.lisp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/types.lisp b/types.lisp index cc305b3..1f172e2 100644 --- a/types.lisp +++ b/types.lisp @@ -4,6 +4,10 @@ (deftype index () '(mod #.array-dimension-limit)) +(deftype simple-octet-vector (&optional (length '*)) + #+(or sbcl cmu) `(simple-array (unsigned-byte 8) (,length)) + #-(or sbcl cmu) `(array (unsigned-byte 8) (,length))) + (defun required-argument () (error "Required argument not provided")) From b9c8791b5e500c1a45727f28baa00d69dc67a077 Mon Sep 17 00:00:00 2001 From: Nathan Froyd Date: Wed, 13 Jan 2010 22:45:54 -0500 Subject: [PATCH 06/76] clarify when we can easy line-break encoded output --- TODO | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/TODO b/TODO index 6a5404d..8a6945e 100644 --- a/TODO +++ b/TODO @@ -7,8 +7,9 @@ that paradigm up with variable-sized groups. * fix ascii85 encoding Must SHRINK-VECTOR on NIL destination in case we encoded 'z'. * line-breaking encoded output -This should be easy; just pass a slightly different closure into this -main encoding function. +This is probably really only doable for encoding to freshly-consed vectors. +Certainly it's much more doable for clients with the upcoming async +interface. * compiler macros for optimizing constant-format versions of encode/decode The trick will be to do this without adding too much bloat. Will probably only do this for a NIL destination, as that's the case where we From ca1f6adc1d6e44af971bd27be4dddafc83dbf5a5 Mon Sep 17 00:00:00 2001 From: Nathan Froyd Date: Wed, 13 Jan 2010 22:46:49 -0500 Subject: [PATCH 07/76] add TODOs for converting to async encode/decode --- TODO | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/TODO b/TODO index 8a6945e..58b6b48 100644 --- a/TODO +++ b/TODO @@ -32,3 +32,16 @@ Especially in the ascii85 case, over-allocation is just a fact of life. We are definitely going to over-allocate if we permit whitespace. * other formats Python's binascii module has a few we might consider adding. +* convert to async API +** encode +*** TODO base16 +*** TODO base32 +*** TODO base64 +*** TODO base85 +*** TODO ascii85 +** decode +*** TODO base16 +*** TODO base32 +*** TODO base64 +*** TODO base85 +*** TODO ascii85 From c856e25abc21fa93de4ee1176e1cc8dbf4461584 Mon Sep 17 00:00:00 2001 From: Nathan Froyd Date: Wed, 13 Jan 2010 22:47:06 -0500 Subject: [PATCH 08/76] add sneaky SIMPLE-STRING deftype --- package.lisp | 1 + types.lisp | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/package.lisp b/package.lisp index fc2b9ea..e3ff008 100644 --- a/package.lisp +++ b/package.lisp @@ -2,4 +2,5 @@ (cl:defpackage :binascii (:use :cl) + (:shadow simple-string) (:export #:encode-octets #:decode-octets)) diff --git a/types.lisp b/types.lisp index 1f172e2..203d0b5 100644 --- a/types.lisp +++ b/types.lisp @@ -8,6 +8,11 @@ #+(or sbcl cmu) `(simple-array (unsigned-byte 8) (,length)) #-(or sbcl cmu) `(array (unsigned-byte 8) (,length))) +(deftype simple-string () + #+sbcl '(and cl:simple-string (not (simple-array nil (*)))) + #+cmu cl:simple-string + #-(or sbcl cmu) cl:string) + (defun required-argument () (error "Required argument not provided")) From 6ed13a9b8641c161e7d97771ab933a39cea52e3d Mon Sep 17 00:00:00 2001 From: Nathan Froyd Date: Wed, 13 Jan 2010 22:49:05 -0500 Subject: [PATCH 09/76] remember that there's other Lisps besides CMUCL and SBCL --- TODO | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/TODO b/TODO index 58b6b48..31ba129 100644 --- a/TODO +++ b/TODO @@ -28,6 +28,13 @@ Trying to do minimal allocation; might have to just suck it up and admit that we are going to over-allocate fairly often. Especially in the ascii85 case, over-allocation is just a fact of life. +* figure out how to make the library fast for non-CMUCL/SBCL +We might just have to accept a bit of code bloat everyplace else, by +duplicating logic/functions for simple vs. non-simple. +I don't want to restrict clients to always using simple arrays everywhere. + +CCL looks like it ought to be able to get around this, but I can't find +the magic call. * ignoring whitespace when decoding We are definitely going to over-allocate if we permit whitespace. * other formats From 06957a2a616a8604e0e4f5d765c819fc156bcadd Mon Sep 17 00:00:00 2001 From: Nathan Froyd Date: Wed, 13 Jan 2010 22:49:41 -0500 Subject: [PATCH 10/76] add commented-out async encoding interface --- octets.lisp | 63 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) diff --git a/octets.lisp b/octets.lisp index 2098177..c352d8d 100644 --- a/octets.lisp +++ b/octets.lisp @@ -150,6 +150,69 @@ octet vector with a fill pointer." #-(or sbcl cmu) (values v start (or end (length v)))) +#|| +(defun encode-to-fresh-vector (octets format start end element-type) + (multiple-value-bind (input start end) + (array-data-and-offsets octets start end) + (let* ((state (find-format-state format)) + (length (funcall (encode-state-encoded-length state) (- end start)))) + (flet ((frob (etype encode-fun) + (let ((v (make-array length :element-type etype))) + (funcall encode-fun state v octets + 0 length start end t) + v))) + (declare (inline frob)) + (ecase (canonical-element-type element-type) + (character + (frob 'character (encode-state-octets->string state))) + (base-char + (frob 'base-char (encode-state-octets->string state))) + (ub8 + (frob '(unsigned-byte 8) (encode-state-octets->octets state)))))))) + +(defun encode (octets format &key (start 0) end (element-type 'base-char)) + (encode-to-fresh-vector octets format start end element-type)) + +(defun encode-octets (destination octets format &key (start 0) end + (output-start 0) output-end (element-type 'base-char) + finishp) + "Encode OCTETS between START and END into ASCII characters +according to FORMAT and write them to DESTINATION according to ELEMENT-TYPE. + +If DESTINATION is NIL and ELEMENT-TYPE is a subtype of CHARACTER, then a +string is returned. If DESTINATION is NIL and ELEMENT-TYPE is +\(UNSIGNED-BYTE 8) or an equivalent type, then an octet vector is returned. + +If ELEMENT-TYPE is a subtype of CHARACTER, then DESTINATION may also be +a string. Similarly, if ELEMENT-TYPE is (UNSIGNED-BYTE 8) or an +equivalent type, then DESTINATION may be an octet vector. In this case, +OUTPUT-START and OUTPUT-END are used to determine the portion of +DESTINATION where the encoded output may be placed. The number of +octets encoded and the number of characters or bytes, respectively, +written are returned as multiple values. ELEMENT-TYPE is ignored. + +If FINISHP is true, then in addition to any encoding of OCTETS, also output +any necessary padding required by FORMAT." + (let ((format-state (find-format format))) + (flet ((frob (encode-fun) + (multiple-value-bind (input input-start input-end) + (array-data-and-offsets octets start end) + (multiple-value-bind (output output-start output-end) + (array-data-and-offsets destination output-start output-end) + (funcall encode-fun format-state + output octets + output-start output-end + input-start input-end nil))))) + (declare (inline frob)) + (etypecase destination + (null + (encode-to-fresh-vector octets format-state start end element-type)) + (string + (frob (encode-state-octets->string format-state))) + ((array (unsigned-byte 8) (*)) + (frob (encode-state-octets->octets format-state))))))) +||# + (defun decode-octets (destination string format &key (start 0) end decoded-length case-fold map01 &allow-other-keys) From fa386fba9be93d0cc41352fc61f1eefa1abbea94 Mon Sep 17 00:00:00 2001 From: Nathan Froyd Date: Wed, 13 Jan 2010 22:49:54 -0500 Subject: [PATCH 11/76] first crack at async encoding interface for base64 --- base64.lisp | 141 ++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 138 insertions(+), 3 deletions(-) diff --git a/base64.lisp b/base64.lisp index 1ac0f4a..9dc9f7c 100644 --- a/base64.lisp +++ b/base64.lisp @@ -8,6 +8,141 @@ (defvar *base64url-encode-table* #.(coerce "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_" 'simple-base-string)) +(defstruct (base64-encode-state + (:include encode-state) + (:copier nil) + (:constructor make-base64-encode-state + (table + &aux (encoded-length #'encoded-length/base64) + (octets->octets #'octets->octets/base64) + (octets->string #'octets->string/base64)))) + (bits 0 :type (unsigned-byte 16)) + (n-bits 0 :type fixnum) + (table *base64-encode-table* :read-only t :type (simple-array base-char (64))) + (adding-padding-p nil) + (padding-remaining 0 :type (integer 0 3))) + +(declaim (inline base64-encoder)) +(defun base64-encoder (state output input + output-start output-end + input-start input-end lastp converter) + (declare (type base64-encode-state state)) + (declare (type simple-octet-vector input)) + (declare (type index output-start output-end input-start input-end)) + (declare (type function converter)) + (declare (optimize speed)) + (let* ((input-index input-start) + (output-index output-start) + (bits (base64-encode-state-bits state)) + (n-bits (base64-encode-state-n-bits state)) + (table (base64-encode-state-table state))) + (declare (type index input-index output-index)) + (declare (type (unsigned-byte 16) bits)) + (declare (type fixnum n-bits)) + (tagbody + PAD-CHECK + (when (base64-encode-state-adding-padding-p state) + (go PAD)) + INPUT-CHECK + (when (>= input-index input-end) + (go DONE)) + DO-INPUT + (when (< n-bits 6) + (setf bits (ldb (byte 16 0) + (logior (ash bits 8) (aref input input-index)))) + (incf input-index) + (incf n-bits 8)) + OUTPUT-CHECK + (when (>= output-index output-end) + (go DONE)) + DO-OUTPUT + (decf n-bits 6) + (setf (aref output output-index) + (funcall converter (aref table (ldb (byte 6 n-bits) bits)))) + (incf output-index) + (if (>= n-bits 6) + (go OUTPUT-CHECK) + (go INPUT-CHECK)) + DONE + (unless lastp + (go RESTORE-STATE)) + (setf (base64-encode-state-adding-padding-p state) t) + (cond + ((= n-bits 2) + (setf (base64-encode-state-padding-remaining state) 3)) + ((= n-bits 4) + (setf (base64-encode-state-padding-remaining state) 2))) + PAD + (cond + ((or (zerop n-bits) + (zerop (base64-encode-state-padding-remaining state))) + (go RESTORE-STATE)) + ((= n-bits 2) + (go DO-PAD-FOR-2-BITS)) + ((= n-bits 4) + (go DO-PAD-FOR-4-BITS))) + DO-PAD-FOR-2-BITS + (let ((padding-remaining (base64-encode-state-padding-remaining state))) + (declare (type (integer 0 3) padding-remaining)) + (when (and (>= padding-remaining 3) + (< output-index output-end)) + (setf (aref output output-index) + (funcall converter + (aref table (ash (ldb (byte 2 0) bits) 4)))) + (incf output-index) + (decf padding-remaining)) + (when (and (>= padding-remaining 2) + (< output-index output-end)) + (setf (aref output output-index) (funcall converter #\=)) + (incf output-index) + (decf padding-remaining)) + (when (and (>= padding-remaining 1) + (< output-index output-end)) + (setf (aref output output-index) (funcall converter #\=)) + (incf output-index) + (decf padding-remaining)) + (when (zerop padding-remaining) + (setf n-bits 0)) + (setf (base64-encode-state-padding-remaining state) padding-remaining) + (go RESTORE-STATE)) + DO-PAD-FOR-4-BITS + (let ((padding-remaining (base64-encode-state-padding-remaining state))) + (declare (type (integer 0 3) padding-remaining)) + (when (and (>= padding-remaining 2) + (< output-index output-end)) + (setf (aref output output-index) + (funcall converter + (aref table (ash (ldb (byte 4 0) bits) 2)))) + (incf output-index) + (decf padding-remaining)) + (when (and (>= padding-remaining 1) + (< output-index output-end)) + (setf (aref output output-index) (funcall converter #\=)) + (incf output-index) + (decf padding-remaining)) + (when (zerop padding-remaining) + (setf n-bits 0)) + (setf (base64-encode-state-padding-remaining state) padding-remaining) + (go RESTORE-STATE)) + RESTORE-STATE + (setf (base64-encode-state-bits state) bits + (base64-encode-state-n-bits state) n-bits)) + (values (- input-index input-start) (- output-index output-start)))) + +(defun octets->octets/base64 (state output input + output-start output-end + input-start input-end lastp) + (declare (type simple-octet-vector output)) + (base64-encoder state output input output-start output-end + input-start input-end lastp #'char-code)) + +(defun octets->string/base64 (state output input + output-start output-end + input-start input-end lastp) + (declare (type simple-string output)) + (base64-encoder state output input output-start output-end + input-start input-end lastp #'identity)) + (defun encode-octets-base64 (octets start end table writer) (declare (type (simple-array (unsigned-byte 8) (*)) octets)) (declare (type index start end)) @@ -30,16 +165,16 @@ (funcall writer (aref table (ash (ldb (byte 4 0) bits) 2))) (funcall writer #\=))))) -(defun encoded-length-base64 (count) +(defun encoded-length/base64 (count) "Return the number of characters required to encode COUNT octets in Base64." (* (ceiling count 3) 4)) (defmethod encoding-tools ((format (eql :base64))) - (values #'encode-octets-base64 #'encoded-length-base64 + (values #'encode-octets-base64 #'encoded-length/base64 *base64-encode-table*)) (defmethod encoding-tools ((format (eql :base64url))) - (values #'encode-octets-base64 #'encoded-length-base64 + (values #'encode-octets-base64 #'encoded-length/base64 *base64url-encode-table*)) (defvar *base64-decode-table* From 8e8286927f910ba0e772e3db1866c230f33bda91 Mon Sep 17 00:00:00 2001 From: Nathan Froyd Date: Thu, 14 Jan 2010 22:01:04 -0500 Subject: [PATCH 12/76] move the OPTIMIZE SPEED declaration to its proper place --- base64.lisp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/base64.lisp b/base64.lisp index 9dc9f7c..016a517 100644 --- a/base64.lisp +++ b/base64.lisp @@ -30,7 +30,6 @@ (declare (type simple-octet-vector input)) (declare (type index output-start output-end input-start input-end)) (declare (type function converter)) - (declare (optimize speed)) (let* ((input-index input-start) (output-index output-start) (bits (base64-encode-state-bits state)) @@ -133,6 +132,7 @@ output-start output-end input-start input-end lastp) (declare (type simple-octet-vector output)) + (declare (optimize speed)) (base64-encoder state output input output-start output-end input-start input-end lastp #'char-code)) @@ -140,6 +140,7 @@ output-start output-end input-start input-end lastp) (declare (type simple-string output)) + (declare (optimize speed)) (base64-encoder state output input output-start output-end input-start input-end lastp #'identity)) From df93b6b30d4585d7f6a198af95b06da2b9749f9b Mon Sep 17 00:00:00 2001 From: Nathan Froyd Date: Thu, 14 Jan 2010 22:01:56 -0500 Subject: [PATCH 13/76] convert base32 encoder to new async interface --- base32.lisp | 115 ++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 112 insertions(+), 3 deletions(-) diff --git a/base32.lisp b/base32.lisp index 388979e..9933e96 100644 --- a/base32.lisp +++ b/base32.lisp @@ -7,6 +7,115 @@ (defvar *base32hex-encode-table* #.(coerce "0123456789ABCDEFGHIJKLMNOPQRSTUV" 'simple-base-string)) +(defstruct (base32-encode-state + (:include encode-state) + (:copier nil) + (:constructor make-base32-encode-state + (table + &aux (encoded-length #'encoded-length/base32) + (octets->octets #'octets->octets/base32) + (octets->string #'octets->string/base32)))) + (bits 0 :type (unsigned-byte 16)) + (n-bits 0 :type fixnum) + (table *base32-encode-table* :read-only t + :type (simple-array base-char (32))) + (adding-padding-p nil) + (padding-remaining 0 :type (integer 0 6))) + +(declaim (inline base32-encoder)) +(defun base32-encoder (state output input + output-start output-end + input-start input-end lastp converter) + (declare (type base32-encode-state state)) + (declare (type simple-octet-vector input)) + (declare (type index output-start output-end input-start input-end)) + (declare (type function converter)) + (let* ((input-index input-start) + (output-index output-start) + (bits (base32-encode-state-bits state)) + (n-bits (base32-encode-state-n-bits state)) + (table (base32-encode-state-table state)) + (n-pad-chars #.(make-array 5 :initial-contents '(0 4 1 6 3) + :element-type 'fixnum))) + + (declare (type index input-index output-index)) + (declare (type (unsigned-byte 16) bits)) + (declare (type fixnum n-bits)) + (declare (type (simple-array fixnum (5)) n-pad-chars)) + (tagbody + PAD-CHECK + (when (base32-encode-state-adding-padding-p state) + (go PAD)) + INPUT-CHECK + (when (>= input-index input-end) + (go DONE)) + DO-INPUT + (when (< n-bits 5) + (setf bits (ldb (byte 16 0) + (logior (ash bits 8) (aref input input-index)))) + (incf input-index) + (incf n-bits 8)) + OUTPUT-CHECK + (when (>= output-index output-end) + (go DONE)) + DO-OUTPUT + (decf n-bits 5) + (setf (aref output output-index) + (funcall converter (aref table (ldb (byte 5 n-bits) bits)))) + (incf output-index) + (if (>= n-bits 5) + (go OUTPUT-CHECK) + (go INPUT-CHECK)) + DONE + (unless lastp + (go RESTORE-STATE)) + (setf (base32-encode-state-adding-padding-p state) t) + (setf (base32-encode-state-padding-remaining state) + (aref n-pad-chars n-bits)) + PAD + (locally (declare (type (integer 0 4) n-bits)) + (let ((padding-remaining (base32-encode-state-padding-remaining state)) + (max-pad-chars (aref n-pad-chars n-bits))) + (declare (type (integer 0 6) padding-remaining)) + (declare (type (integer 0 6) max-pad-chars)) + (when (and (= padding-remaining max-pad-chars) + (< output-index output-end)) + (setf (aref output output-index) + (funcall converter + (aref table (ash (ldb (byte n-bits 0) bits) + (- 5 n-bits))))) + (incf output-index) + (decf padding-remaining)) + (when (< padding-remaining max-pad-chars) + (loop while (and (>= padding-remaining 0) + (< output-index output-end)) + do (setf (aref output output-index) (funcall converter #\=)) + (incf output-index) + (decf padding-remaining))) + (when (zerop padding-remaining) + (setf n-bits 0)) + (setf (base32-encode-state-padding-remaining state) padding-remaining))) + RESTORE-STATE + (setf (base32-encode-state-bits state) bits + (base32-encode-state-n-bits state) n-bits)) + (values (- input-index input-start) (- output-index output-start)))) + +(defun octets->octets/base32 (state output input + output-start output-end + input-start input-end lastp) + (declare (type simple-octet-vector output)) + (declare (optimize speed)) + (base32-encoder state output input output-start output-end + input-start input-end lastp #'char-code)) + +(defun octets->string/base32 (state output input + output-start output-end + input-start input-end lastp) + (declare (type simple-string output)) + (declare (optimize speed)) + (base32-encoder state output input output-start output-end + input-start input-end lastp #'identity)) + (defun encode-octets-base32 (octets start end table writer) (declare (type (simple-array (unsigned-byte 8) (*)) octets)) (declare (type index start end)) @@ -41,16 +150,16 @@ (dotimes (i n-pad) (funcall writer #\=))))) -(defun encoded-length-base32 (count) +(defun encoded-length/base32 (count) "Return the number of characters required to encode COUNT octets in Base32." (* (ceiling count 5) 8)) (defmethod encoding-tools ((format (eql :base32))) - (values #'encode-octets-base32 #'encoded-length-base32 + (values #'encode-octets-base32 #'encoded-length/base32 *base32-encode-table*)) (defmethod encoding-tools ((format (eql :base32hex))) - (values #'encode-octets-base32 #'encoded-length-base32 + (values #'encode-octets-base32 #'encoded-length/base32 *base32hex-encode-table*)) (defvar *base32-decode-table* (make-decode-table *base32-encode-table*)) From 6479bd6c1cbf80b916210bf22bc80e3d11cc6cad Mon Sep 17 00:00:00 2001 From: Nathan Froyd Date: Sat, 16 Jan 2010 16:52:29 -0500 Subject: [PATCH 14/76] rename ENCODED-LENGTH-BASE16 --- base16.lisp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/base16.lisp b/base16.lisp index 0502202..a89172a 100644 --- a/base16.lisp +++ b/base16.lisp @@ -11,7 +11,7 @@ (make-decode-table *base16-encode-table*)) (declaim (type decode-table *base16-decode-table*)) -(defun encoded-length-base16 (count) +(defun encoded-length/base16 (count) "Return the number of characters required to encode COUNT octets in Base16." (* count 2)) @@ -26,11 +26,11 @@ (funcall writer (aref table (ldb (byte 4 0) byte)))))) (defmethod encoding-tools ((format (eql :base16))) - (values #'encode-octets-base16 #'encoded-length-base16 + (values #'encode-octets-base16 #'encoded-length/base16 *base16-encode-table*)) (defmethod encoding-tools ((format (eql :hex))) - (values #'encode-octets-base16 #'encoded-length-base16 + (values #'encode-octets-base16 #'encoded-length/base16 *hex-encode-table*)) (defun decode-octets-base16 (string start end length table writer) From 271a088a6cdd37ff2bb4f18b157b54522d46d384 Mon Sep 17 00:00:00 2001 From: Nathan Froyd Date: Sat, 16 Jan 2010 16:52:43 -0500 Subject: [PATCH 15/76] add BASE16-ENCODE-STATE --- base16.lisp | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/base16.lisp b/base16.lisp index a89172a..486bfcd 100644 --- a/base16.lisp +++ b/base16.lisp @@ -11,6 +11,20 @@ (make-decode-table *base16-encode-table*)) (declaim (type decode-table *base16-decode-table*)) +(defstruct (base16-encode-state + (:include encode-state) + (:copier nil) + (:constructor make-base16-encode-state + (table + &aux (encoded-length #'encoded-length/base16) + (octets->octets #'octets->octets/base16) + (octets->string #'octets->string/base16)))) + (bits 0 :type (unsigned-byte 8)) + (n-bits 0 :type fixnum) + (table *base16-encode-table* :read-only t + :type (simple-array base-char (16))) + (finished-input-p nil)) + (defun encoded-length/base16 (count) "Return the number of characters required to encode COUNT octets in Base16." (* count 2)) From a3376d3b51026179fda1a2d31b7c346496051994 Mon Sep 17 00:00:00 2001 From: Nathan Froyd Date: Sat, 16 Jan 2010 17:02:06 -0500 Subject: [PATCH 16/76] add async encode functions for base16 --- base16.lisp | 75 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) diff --git a/base16.lisp b/base16.lisp index 486bfcd..408a770 100644 --- a/base16.lisp +++ b/base16.lisp @@ -25,10 +25,85 @@ :type (simple-array base-char (16))) (finished-input-p nil)) +(declaim (inline base16-encoder)) +(defun base16-encoder (state output input + output-start output-end + input-start input-end lastp converter) + (declare (type base16-encode-state state)) + (declare (type simple-octet-vector input)) + (declare (type index output-start output-end input-start input-end)) + (declare (type function converter)) + (let ((input-index input-start) + (output-index output-start) + (bits (base16-encode-state-bits state)) + (n-bits (base16-encode-state-n-bits state)) + (table (base16-encode-state-table state))) + (declare (type index input-index output-index)) + (declare (type (unsigned-byte 8) bits)) + (declare (type fixnum n-bits)) + (tagbody + PAD-CHECK + (when (base16-encode-state-finished-input-p state) + (go FLUSH-BITS)) + INPUT-CHECK + (when (>= index-index input-end) + (go DONE)) + DO-INPUT + (when (zerop n-bits) + (setf bits (aref input input-index)) + (incf input-index) + (setf n-bits 8)) + OUTPUT-CHECK + (when (>= output-index output-end) + (go DONE)) + DO-OUTPUT + (decf n-bits 4) + (setf (aref output output-index) + (funcall converter (aref table (ldb (byte 4 n-bits) bits)))) + (incf output-index) + (if (>= n-bits 4) + (go OUTPUT-CHECK) + (go INPUT-CHECK)) + DONE + (unless lastp + (go RESTORE-STATE)) + (setf (base16-encode-state-finished-input-p state) t) + FLUSH-BITS + (when (zerop n-bits) + (go RESTORE-STATE)) + FLUSH-OUTPUT-CHECK + (when (>= output-index output-end) + (go RESTORE-STATE)) + DO-FLUSH-OUTPUT + (decf n-bits 4) + (setf (aref output output-index) + (funcall converter (aref table (ldb (byte 4 n-bits) bits)))) + (incf output-index) + (when (= n-bits 4) + (go FLUSH-OUTPUT-CHECK)) + RESTORE-STATE + (setf (base16-encode-state-bits state) bits + (base16-encode-state-n-bits state) n-bits)) + (values (- input-index input-start) (- output-index output-start)))) + (defun encoded-length/base16 (count) "Return the number of characters required to encode COUNT octets in Base16." (* count 2)) +(defun octets->octets/base16 (state output input + output-start output-end + input-start input-end lastp) + (declare (type simple-octet-vector output)) + (base16-encoder state output input output-start output-end + input-start input-end lastp #'char-code)) + +(defun octets->string/base16 (state output input + output-start output-end + input-start input-end lastp) + (declare (type simple-string output)) + (base16-encoder state output input output-start output-end + input-start input-end lastp #'identity)) + (defun encode-octets-base16 (octets start end table writer) (declare (type (simple-array (unsigned-byte 8) (*)) octets)) (declare (type index start end)) From 8b73bff4f67e998591e44f301a0c46462b5c475b Mon Sep 17 00:00:00 2001 From: Nathan Froyd Date: Sat, 16 Jan 2010 17:07:48 -0500 Subject: [PATCH 17/76] rename ENCODED-LENGTH-BASE85 --- base85.lisp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/base85.lisp b/base85.lisp index 90e4766..25af659 100644 --- a/base85.lisp +++ b/base85.lisp @@ -5,7 +5,7 @@ (defvar *base85-encode-table* #.(coerce "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz!#$%&()*+-;<=>?@^_`{|}~" 'simple-base-string)) -(defun encoded-length-base85 (count) +(defun encoded-length/base85 (count) "Return the number of characters required to encode COUNT octets in Base85." (* (ceiling count 4) 5)) @@ -31,7 +31,7 @@ (funcall writer (aref buffer i))))))) (defmethod encoding-tools ((format (eql :base85))) - (values #'encode-octets-base85 #'encoded-length-base85 + (values #'encode-octets-base85 #'encoded-length/base85 *base85-encode-table*)) (defvar *base85-decode-table* (make-decode-table *base85-encode-table*)) From cdd82e65fa5b2410153097db85abe571101bdaa0 Mon Sep 17 00:00:00 2001 From: Nathan Froyd Date: Sat, 16 Jan 2010 17:14:46 -0500 Subject: [PATCH 18/76] add BASE85-ENCODE-STATE --- base85.lisp | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/base85.lisp b/base85.lisp index 25af659..b5d547f 100644 --- a/base85.lisp +++ b/base85.lisp @@ -5,6 +5,28 @@ (defvar *base85-encode-table* #.(coerce "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz!#$%&()*+-;<=>?@^_`{|}~" 'simple-base-string)) +(defstruct (base85-encode-state + (:include encode-state) + (:copier nil) + (:constructor make-base85-encode-state + (table + &aux (encoded-length #'encoded-length/base85) + (octets->octets #'octets->octets/base85) + (octets->string #'octets->string/base85)))) + ;; TODO: Clever hack for little-endian machines: fill in GROUP + ;; back-to-front, using PENDING to count down, then use SBCL's + ;; %VECTOR-RAW-BITS or similar to read out the group in proper + ;; big-endian order. We could even do the same thing on x86-64 if we + ;; made the buffer bigger. + ;; + ;; For now, though, we'll fill GROUP front-to-back and PENDING will + ;; indicate how many octets we've filled in. + (group (make-array 4 :element-type '(unsigned-byte 8)) + :read-only t :type (simple-array (unsigned-byte 8) (4))) + (pending 0 :type (integer 0 4)) + (table *base85-encode-table* :read-only t :type (simple-array base-char (85))) + (finished-input-p nil)) + (defun encoded-length/base85 (count) "Return the number of characters required to encode COUNT octets in Base85." (* (ceiling count 4) 5)) From a446e1fe6b2edba1576eb6f92e13b695815a680c Mon Sep 17 00:00:00 2001 From: Nathan Froyd Date: Sun, 17 Jan 2010 08:37:59 -0500 Subject: [PATCH 19/76] add async encode functions for base85 --- base85.lisp | 116 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 116 insertions(+) diff --git a/base85.lisp b/base85.lisp index b5d547f..a686d6d 100644 --- a/base85.lisp +++ b/base85.lisp @@ -21,9 +21,14 @@ ;; ;; For now, though, we'll fill GROUP front-to-back and PENDING will ;; indicate how many octets we've filled in. + #+nil (group (make-array 4 :element-type '(unsigned-byte 8)) :read-only t :type (simple-array (unsigned-byte 8) (4))) + (bits 0 :type (unsigned-byte 32)) (pending 0 :type (integer 0 4)) + (output-group (make-array 5 :element-type 'base-char) + :read-only t :type (simple-array base-char (5))) + (output-pending 0 :type (integer 0 5)) (table *base85-encode-table* :read-only t :type (simple-array base-char (85))) (finished-input-p nil)) @@ -31,6 +36,117 @@ "Return the number of characters required to encode COUNT octets in Base85." (* (ceiling count 4) 5)) +(declaim (inline base85-encode)) +(defun base85-encoder (state output input + output-start ouput-end + input-start input-end lastp converter) + (declare (type base85-encode-state state)) + (declare (type simple-octet-vector input)) + (declare (type index output-start output-end input-start input-end)) + (declare (type function converter)) + (let ((input-index input-start) + (output-index output-start) + (bits (base85-encode-state-bits state)) + (pending (base85-encode-state-pending state)) + (output-group (base85-encode-state-output-group state)) + (output-pending (base85-encode-state-output-pending state)) + (table (base85-encode-state-table state))) + (declare (type index input-index output-index)) + (declare (type (unsigned-byte 32) bits)) + (declare (type (integer 0 4) pending)) + (declare (type (integer 0 5) output-pending)) + (flet ((expand-for-output (bits output-group) + (loop for i from 4 downto 0 + do (multiple-value-bind (b index) (truncate bits 85) + (setf bits b + (aref output-group i) (aref table index))) + finally (setf output-pending 5)))) + (declare (inline expand-for-output)) + (tagbody + PAD-CHECK + (when (base85-encoder-state-finished-input-p state) + (go FLUSH-BITS)) + INPUT-CHECK + (when (>= input-index input-end) + (go DONE)) + DO-INPUT + (when (< pending 4) + (setf bits (ldb (byte 32 0) + (logior (ash (aref input input-index) + (- 24 (* pending 8))) + bits))) + (incf input-index) + (incf pending) + (go INPUT-CHECK)) + EXPAND-FOR-OUTPUT + (expand-for-output bits output-group) + OUTPUT-CHECK + (when (>= output-index output-end) + (go DONE)) + DO-OUTPUT + (when (> output-pending 0) + (setf (aref output output-index) + (funcall converter + (aref output-group (decf output-pending)))) + (incf output-index) + (cond + ((zerop output-pending) + (setf bits 0) + (setf pending 0) + (go INPUT-CHECK)) + (t + (go OUTPUT-CHECK)))) + DONE + (unless lastp + (go RESTORE-STATE)) + (setf (base85-encode-state-finished-input-p state) t) + ;; Make it appear as though the input were padded with zeros to a + ;; full input group. + (let ((for-pad (- 4 pending))) + (setf bits (ldb (byte 32 0) (ash bits (* 8 for-pad)))) + (setf pending 4) + (expand-for-output bits output-group)) + FLUSH-BITS + (when (zerop output-pending) + (go RESTORE-STATE)) + FLUSH-OUTPUT-CHECK + (when (>= output-index output-end) + (go RESTORE-STATE)) + DO-FLUSH-OUTPUT + (when (> output-pending 0) + (setf (aref output output-index) + (funcall converter + (aref output-group (decf output-pending)))) + (incf output-index) + (cond + ((zerop output-pending) + (setf bits 0) + (setf pending 0) + (go RESTORE-STATE)) + (t + (go OUTPUT-CHECK)))) + RESTORE-STATE + (setf (base85-encode-state-bits state) bits + (base85-encode-state-pending state) pending + (base85-encode-state-output-pending state) output-pending)) + (values (- input-index input-start) (- output-index output-start))))) + +(defun octets->octets/base85 (state output input + output-start output-end + input-start input-end lastp) + (declare (type simple-octet-vector output)) + (declare (optimize speed)) + (base85-encoder state output input output-start output-end + input-start input-end lastp #'char-code)) + +(defun octets->string/base85 (state output input + output-start output-end + input-start input-end lastp) + (declare (type simple-string output)) + (declare (optimize speed)) + (base85-encoder state output input output-start output-end + input-start input-end lastp #'identity)) + (defun encode-octets-base85 (octets start end table writer) (declare (type (simple-array (unsigned-byte 8) (*)) octets)) (declare (type index start end)) From 526b574be37b6f89321f34a9e95499fc065ab0a4 Mon Sep 17 00:00:00 2001 From: Nathan Froyd Date: Fri, 22 Jan 2010 14:00:35 -0500 Subject: [PATCH 20/76] move encoding/decoding functions into a FORMAT-DESCRIPTOR structure --- types.lisp | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/types.lisp b/types.lisp index 203d0b5..326fe3c 100644 --- a/types.lisp +++ b/types.lisp @@ -16,16 +16,26 @@ (defun required-argument () (error "Required argument not provided")) +(defstruct (format-descriptor + (:copier nil) + (:constructor make-format-descriptor + (encoded-length octets->string octets->octets + decoded-length + string->octets + octets->string))) + (encoded-length (required-argument) :type function :read-only t) + (octets->string (required-argument) :type function :read-only t) + (octets->octets (required-argument) :type function :read-only t) + (decoded-length (required-argument) :type function :read-only t) + (string->octets (required-argument) :type function :read-only t) + (octets->octets (required-argument) :type function :read-only t)) + (defstruct (encode-state (:copier nil) (:constructor)) - (encoded-length (required-argument) :read-only t) - (octets->string (required-argument) :read-only t) - (octets->octets (required-argument) :read-only t)) + ) (defstruct (decode-state (:copier nil) (:constructor)) - (decoded-length (required-argument) :read-only t) - (string->octets (required-argument) :read-only t) - (octets->octets (required-argument) :read-only t)) + ) From a4e47c605d6d235afe5e5f9fdc852dfadad4bacb Mon Sep 17 00:00:00 2001 From: Nathan Froyd Date: Fri, 22 Jan 2010 14:01:20 -0500 Subject: [PATCH 21/76] add LINE-BREAK and FINISHED-INPUT-P fields to ENCODE-STATE --- base85.lisp | 3 +-- types.lisp | 8 +++++++- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/base85.lisp b/base85.lisp index a686d6d..5261913 100644 --- a/base85.lisp +++ b/base85.lisp @@ -29,8 +29,7 @@ (output-group (make-array 5 :element-type 'base-char) :read-only t :type (simple-array base-char (5))) (output-pending 0 :type (integer 0 5)) - (table *base85-encode-table* :read-only t :type (simple-array base-char (85))) - (finished-input-p nil)) + (table *base85-encode-table* :read-only t :type (simple-array base-char (85)))) (defun encoded-length/base85 (count) "Return the number of characters required to encode COUNT octets in Base85." diff --git a/types.lisp b/types.lisp index 326fe3c..f2d1733 100644 --- a/types.lisp +++ b/types.lisp @@ -33,7 +33,13 @@ (defstruct (encode-state (:copier nil) (:constructor)) - ) + ;; LINE-BREAK describes after how many characters we should be + ;; inserting newlines into the encoded output. It is zero if we + ;; should never insert newlines. + (line-break 0 :type (integer 0 *)) + ;; FINISHED-INPUT-P is either T or NIL depending on whether we have + ;; seen all of the input to be encoded. + (finished-input-p nil)) (defstruct (decode-state (:copier nil) From 7e8175445ba4a69cc506807e13944930e5316c9b Mon Sep 17 00:00:00 2001 From: Nathan Froyd Date: Fri, 22 Jan 2010 14:03:11 -0500 Subject: [PATCH 22/76] FINISHED-INPUT-P supersedes ADDING-PADDING-P fields --- base16.lisp | 3 +-- base64.lisp | 4 ++-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/base16.lisp b/base16.lisp index 408a770..96d2cba 100644 --- a/base16.lisp +++ b/base16.lisp @@ -22,8 +22,7 @@ (bits 0 :type (unsigned-byte 8)) (n-bits 0 :type fixnum) (table *base16-encode-table* :read-only t - :type (simple-array base-char (16))) - (finished-input-p nil)) + :type (simple-array base-char (16)))) (declaim (inline base16-encoder)) (defun base16-encoder (state output input diff --git a/base64.lisp b/base64.lisp index 016a517..900a46b 100644 --- a/base64.lisp +++ b/base64.lisp @@ -40,7 +40,7 @@ (declare (type fixnum n-bits)) (tagbody PAD-CHECK - (when (base64-encode-state-adding-padding-p state) + (when (base64-encode-state-finished-input-p state) (go PAD)) INPUT-CHECK (when (>= input-index input-end) @@ -65,7 +65,7 @@ DONE (unless lastp (go RESTORE-STATE)) - (setf (base64-encode-state-adding-padding-p state) t) + (setf (base64-encode-state-finished-input-p state) t) (cond ((= n-bits 2) (setf (base64-encode-state-padding-remaining state) 3)) From 352a72c9454c8974e14a54a7b3e66824d9edf712 Mon Sep 17 00:00:00 2001 From: Nathan Froyd Date: Fri, 22 Jan 2010 14:03:46 -0500 Subject: [PATCH 23/76] one more ADDING-PADDING-P elimination --- base64.lisp | 1 - 1 file changed, 1 deletion(-) diff --git a/base64.lisp b/base64.lisp index 900a46b..3a6d1f7 100644 --- a/base64.lisp +++ b/base64.lisp @@ -19,7 +19,6 @@ (bits 0 :type (unsigned-byte 16)) (n-bits 0 :type fixnum) (table *base64-encode-table* :read-only t :type (simple-array base-char (64))) - (adding-padding-p nil) (padding-remaining 0 :type (integer 0 3))) (declaim (inline base64-encoder)) From 73e2bc5683f962a4543cfefce1d952a9ea3fec58 Mon Sep 17 00:00:00 2001 From: Nathan Froyd Date: Fri, 22 Jan 2010 14:04:32 -0500 Subject: [PATCH 24/76] move base64 functions into new FORMAT-DESCRIPTOR structure --- base64.lisp | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/base64.lisp b/base64.lisp index 3a6d1f7..46a1d45 100644 --- a/base64.lisp +++ b/base64.lisp @@ -8,14 +8,20 @@ (defvar *base64url-encode-table* #.(coerce "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_" 'simple-base-string)) +(defvar *base64-format-descriptor* + (make-format-descriptor #'encoded-length/base64 + #'octets->string/base64 + #'octets->octets/base64 + #'decoded-length-base64 + #'string->octets/base64 + #'octets->octets/base64)) + (defstruct (base64-encode-state - (:include encode-state) (:copier nil) (:constructor make-base64-encode-state - (table - &aux (encoded-length #'encoded-length/base64) - (octets->octets #'octets->octets/base64) - (octets->string #'octets->string/base64)))) + (&aux (table *base64-encode-table*))) + (:constructor make-base64url-encode-state + (&aux (table *base64url-encode-table*)))) (bits 0 :type (unsigned-byte 16)) (n-bits 0 :type fixnum) (table *base64-encode-table* :read-only t :type (simple-array base-char (64))) From b4265c137ac1fc887dcb64bdcb93331230ab4a62 Mon Sep 17 00:00:00 2001 From: Nathan Froyd Date: Fri, 22 Jan 2010 14:05:57 -0500 Subject: [PATCH 25/76] move base16 functions into new FORMAT-DESCRIPTOR structure --- base16.lisp | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/base16.lisp b/base16.lisp index 96d2cba..7b635a3 100644 --- a/base16.lisp +++ b/base16.lisp @@ -11,14 +11,21 @@ (make-decode-table *base16-encode-table*)) (declaim (type decode-table *base16-decode-table*)) +(defvar *base16-format-descriptor + (make-format-descriptor #'encoded-length/base16 + #'octets->string/base16 + #'octets->octets/base16 + #'decoded-length-base16 + #'string->octets/base16 + #'octets->octets/base16)) + (defstruct (base16-encode-state (:include encode-state) (:copier nil) (:constructor make-base16-encode-state - (table - &aux (encoded-length #'encoded-length/base16) - (octets->octets #'octets->octets/base16) - (octets->string #'octets->string/base16)))) + (&aux (table *base16-encode-table*))) + (:constructor make-hex-encode-state + (&aux (table *hex-encode-table*)))) (bits 0 :type (unsigned-byte 8)) (n-bits 0 :type fixnum) (table *base16-encode-table* :read-only t From 7402c18516ac2754fa26aba138b0f202c90659a2 Mon Sep 17 00:00:00 2001 From: Nathan Froyd Date: Fri, 22 Jan 2010 14:06:50 -0500 Subject: [PATCH 26/76] move base85 functions into new FORMAT-DESCRIPTOR structure --- base85.lisp | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/base85.lisp b/base85.lisp index 5261913..f6763ac 100644 --- a/base85.lisp +++ b/base85.lisp @@ -5,14 +5,18 @@ (defvar *base85-encode-table* #.(coerce "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz!#$%&()*+-;<=>?@^_`{|}~" 'simple-base-string)) +(defvar *base85-format-descriptor* + (make-format-descriptor #'encoded-length/base85 + #'octets->string/base85 + #'octets->octets/base85 + #'decoded-length-base85 + #'string->octets/base85 + #'octets->octets/base85)) + (defstruct (base85-encode-state (:include encode-state) (:copier nil) - (:constructor make-base85-encode-state - (table - &aux (encoded-length #'encoded-length/base85) - (octets->octets #'octets->octets/base85) - (octets->string #'octets->string/base85)))) + (:constructor make-base85-encode-state)) ;; TODO: Clever hack for little-endian machines: fill in GROUP ;; back-to-front, using PENDING to count down, then use SBCL's ;; %VECTOR-RAW-BITS or similar to read out the group in proper From d1bc3327c696f0c91b73e0cb69ee53e2ac4d26a1 Mon Sep 17 00:00:00 2001 From: Nathan Froyd Date: Fri, 22 Jan 2010 14:08:35 -0500 Subject: [PATCH 27/76] missed a trailing * for base16 format descriptor --- base16.lisp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base16.lisp b/base16.lisp index 7b635a3..b59f042 100644 --- a/base16.lisp +++ b/base16.lisp @@ -11,7 +11,7 @@ (make-decode-table *base16-encode-table*)) (declaim (type decode-table *base16-decode-table*)) -(defvar *base16-format-descriptor +(defvar *base16-format-descriptor* (make-format-descriptor #'encoded-length/base16 #'octets->string/base16 #'octets->octets/base16 From 73019680813bfa6a10f984a6a922b25369c4e387 Mon Sep 17 00:00:00 2001 From: Nathan Froyd Date: Fri, 22 Jan 2010 14:21:13 -0500 Subject: [PATCH 28/76] fix typo in base85.lisp --- base85.lisp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base85.lisp b/base85.lisp index f6763ac..bc266b9 100644 --- a/base85.lisp +++ b/base85.lisp @@ -67,7 +67,7 @@ (declare (inline expand-for-output)) (tagbody PAD-CHECK - (when (base85-encoder-state-finished-input-p state) + (when (base85-encode-state-finished-input-p state) (go FLUSH-BITS)) INPUT-CHECK (when (>= input-index input-end) From 226ea02a6ec4866e87bdc24ecb679a2b92b7ddcc Mon Sep 17 00:00:00 2001 From: Nathan Froyd Date: Fri, 22 Jan 2010 14:26:28 -0500 Subject: [PATCH 29/76] fix wrong state transition for base85 --- base85.lisp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base85.lisp b/base85.lisp index bc266b9..7a7c244 100644 --- a/base85.lisp +++ b/base85.lisp @@ -127,7 +127,7 @@ (setf pending 0) (go RESTORE-STATE)) (t - (go OUTPUT-CHECK)))) + (go FLUSH-OUTPUT-CHECK)))) RESTORE-STATE (setf (base85-encode-state-bits state) bits (base85-encode-state-pending state) pending From 3752b95b51569db776f2354de3566fb547ecd3d9 Mon Sep 17 00:00:00 2001 From: Nathan Froyd Date: Fri, 22 Jan 2010 14:26:59 -0500 Subject: [PATCH 30/76] add ASCII85 format descriptor and encode state --- ascii85.lisp | 133 ++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 132 insertions(+), 1 deletion(-) diff --git a/ascii85.lisp b/ascii85.lisp index 7cd707a..83dfef6 100644 --- a/ascii85.lisp +++ b/ascii85.lisp @@ -5,7 +5,27 @@ (defvar *ascii85-encode-table* #.(coerce "!\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstu" 'simple-base-string)) -(defun encoded-length-ascii85 (count) +(defvar *ascii85-format-descriptor* + (make-format-descriptor #'encoded-length/ascii85 + #'octets->string/ascii85 + #'octets->octets/ascii85 + #'decoded-length-ascii85 + #'string->octets/ascii85 + #'octets->octets/ascii85)) + +(defstruct (ascii85-encode-state + (:include encode-state) + (:copier nil) + (:constructor make-ascii85-encode-state)) + (bits 0 :type (unsigned-byte 32)) + (pending 0 :type (integer 0 4)) + (output-group (make-array 5 :element-type 'base-char) + :read-only t :type (simple-array base-char (5))) + (output-pending 0 :type (integer 0 5)) + (table *ascii85-encode-table* :read-only t + :type (simple-array base-car (85)))) + +(defun encoded-length/ascii85 (count) "Return the number of characters required to encode COUNT octets in Ascii85." (multiple-value-bind (q r) (truncate count 4) (let ((complete (* q 5))) @@ -13,6 +33,117 @@ complete (+ complete r 1))))) +(declaim (inline ascii85-encode)) +(defun ascii85-encoder (state output input + ouput-start output-end + input-start input-end lastp converter) + (declare (type ascii85-encode-state state)) + (declare (type simple-octet-vector input)) + (declare (type index output-start output-end input-start input-end)) + (declare (type function coverter)) + (let ((input-index input-start) + (output-index output-start) + (bits (ascii85-encode-state-bits state)) + (pending (ascii85-encode-state-pending)) + (output-group (ascii85-encode-state-output-group state)) + (output-pending (ascii85-encode-state-output-pending state))) + (declare (type index input-index output-index)) + (declare (type (unsigned-byte 32) bits)) + (declare (type (integer 0 4) pending)) + (declare (type (integer 0 5) output-pending)) + (flet ((expand-for-output (bits output-group) + (cond + ((zerop group) + (setf (aref output-group 0) #\z) + 1) + (t + (loop for i from 45 downto 0 + do (multiple-value-bind (b index) (truncate bits 85) + (setf bits b + (aref output-group i) + (code-char (+ #.(char-code #\!) index)))) + finally (return 5)))))) + (tagbody + PAD-CHECK + (when (ascii85-encode-state-finished-input-p state) + (go FLUSH-BITS)) + INPUT-CHECK + (when (>= input-index input-end) + (go DONE)) + DO-INPUT + (when (< pending 4) + (setf bits (ldb (byte 32 0) + (logior (ash (aref input input-index) + (- 24 (* pending 8))) + bits))) + (incf input-index) + (incf pending) + (go INPUT-CHECK)) + EXPAND-FOR-OUTPUT + (setf output-pending (expand-for-output bits output-group)) + OUTPUT-CHECK + (when (>= output-index output-end) + (go DONE)) + DO-OUTPUT + (when (> output-pending 0) + (setf (aref output output-index) + (funcall converter + (aref output-group (decf output-pending)))) + (incf output-index) + (cond + ((zerop output-pending) + (setf bits 0) + (setf pending 0) + (go INPUT-CHECK)) + (t + (go OUTPUT-CHECK)))) + DONE + (unless lastp + (go RESTORE-STATE)) + (setf (ascii85-encode-state-finished-input-p state) t) + (setf output-pending (expand-for-output bits output-group) + output-pending (1+ pending)) + FLUSH-BITS + (when (zerop output-pending) + (go RESTORE-STATE)) + FLUSH-OUTPUT-CHECK + (when (>= output-index output-end) + (go RESTORE-STATE)) + DO-FLUSH-OUTPUT + (when (> output-pending 0) + (setf (aref output output-index) + (funcall converter + (aref output-group (decf output-pending)))) + (incf output-index) + (cond + ((zerop output-pending) + (setf bits 0) + (setf pending 0) + (go RESTORE-STATE)) + (t + (go FLUSH-OUTPUT-CHECK)))) + RESTORE-STATE + (setf (ascii85-encode-state-bits state) bits + (ascii85-encode-state-pending state) pending + (ascii85-encode-state-output-pending state) output-pending)) + (values (- input-index input-start) (- output-index output-start))))) + +(defun octets->octets/ascii85 (state output input + output-start output-end + input-start input-end lastp) + (declare (type simple-octet-vector output)) + (declare (optimize speed)) + (ascii85-encoder state output input output-start output-end + input-start input-end lastp #'char-code)) + +(defun octets->string/ascii85 (state output input + output-start output-end + input-start input-end lastp) + (declare (type simple-string output)) + (declare (optimize speed)) + (ascii85-encoder state output input output-start output-end + input-start input-end lastp #'identity)) + (defun encode-octets-ascii85 (octets start end table writer) (declare (type (simple-array (unsigned-byte 8) (*)) octets)) (declare (type index start end)) From 008f4818ab1e0b6f2d1d1b2772e6bd21babd33bf Mon Sep 17 00:00:00 2001 From: Nathan Froyd Date: Sat, 23 Jan 2010 20:56:37 -0500 Subject: [PATCH 31/76] add a descriptor slot to {ENCODE,DECODE}-STATE --- types.lisp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/types.lisp b/types.lisp index f2d1733..fce8aaf 100644 --- a/types.lisp +++ b/types.lisp @@ -30,7 +30,13 @@ (string->octets (required-argument) :type function :read-only t) (octets->octets (required-argument) :type function :read-only t)) +(defstruct (state + (:copier nil) + (:constructor nil)) + (descriptor (required-argument) :type format-descriptor :read-only t)) + (defstruct (encode-state + (:include state) (:copier nil) (:constructor)) ;; LINE-BREAK describes after how many characters we should be @@ -42,6 +48,7 @@ (finished-input-p nil)) (defstruct (decode-state + (:include state) (:copier nil) (:constructor)) ) From 3ced1a48f1e0381e648d9ed23d64fcc177fbec27 Mon Sep 17 00:00:00 2001 From: Nathan Froyd Date: Sat, 23 Jan 2010 20:58:48 -0500 Subject: [PATCH 32/76] first cut at revamped DECODE-OCTETS --- octets.lisp | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/octets.lisp b/octets.lisp index c352d8d..f6213f0 100644 --- a/octets.lisp +++ b/octets.lisp @@ -211,6 +211,45 @@ any necessary padding required by FORMAT." (frob (encode-state-octets->string format-state))) ((array (unsigned-byte 8) (*)) (frob (encode-state-octets->octets format-state))))))) + +(defun decode-to-fresh-vector (string format-start start end) + (multiple-value-bind (input start end) + (array-data-and-offsets string start end) + (let* ((state (find-format-state format case-fold map01)) + (length (funcall (decode-state-decoded-length state) + (- end start)))) + (flet ((frob (v decode-fun) + (funcall decode-fun state v string 0 length start end))) + (let ((octets (make-array length :element-type '(unsigned-byte 8)))) + (etypecase string + (simple-string + (frob octets (decode-state-string->octets state))) + (simple-octet-vector + (frob octets (decode-state-octets->octets state))))))))) + +(defun decode-octets (destination string format &key (start 0) end + (output-start 0) output-end case-fold map01 finishp) + "Decode the characters of STRING between START and END into octets +according to FORMAT. DECODED-LENGTH indicates the number of decoded +octets to expect. DESTINATION may be NIL. + (let ((format-state (find-format format case-fold map01))) + (flet ((frob (decode-fun) + (multiple-value-bind (input input-start input-end) + (array-data-and-offsets string start end) + (multiple-value-bind (output output-start output-end) + (array-data-and-offsets destination output-start output-end) + (funcall decode-fun format-start + output string + output-start output-end + input-start input-end nil))))) + (declare (inline frob)) + (etypecase string + (null + (decode-to-fresh-vector string format-start start end)) + (string + (frob (decode-state-string->octets format-state))) + ((array (unsigned-byte 8) (*)) + (frob (decode-state-octets->octets format-state))))))) ||# (defun decode-octets (destination string format From acf9b228b103c032fdcd899cf06042fc1cf727dd Mon Sep 17 00:00:00 2001 From: Nathan Froyd Date: Sun, 24 Jan 2010 20:26:45 -0500 Subject: [PATCH 33/76] fix typo in ascii85.lisp --- ascii85.lisp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ascii85.lisp b/ascii85.lisp index 83dfef6..4816899 100644 --- a/ascii85.lisp +++ b/ascii85.lisp @@ -40,7 +40,7 @@ (declare (type ascii85-encode-state state)) (declare (type simple-octet-vector input)) (declare (type index output-start output-end input-start input-end)) - (declare (type function coverter)) + (declare (type function converter)) (let ((input-index input-start) (output-index output-start) (bits (ascii85-encode-state-bits state)) From 59fee6c433c2a8dd83d1d73f321fcd9f6c90d7a7 Mon Sep 17 00:00:00 2001 From: Nathan Froyd Date: Sun, 24 Jan 2010 20:27:10 -0500 Subject: [PATCH 34/76] fix format descriptor confusion --- types.lisp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/types.lisp b/types.lisp index fce8aaf..2b7a5eb 100644 --- a/types.lisp +++ b/types.lisp @@ -19,16 +19,17 @@ (defstruct (format-descriptor (:copier nil) (:constructor make-format-descriptor - (encoded-length octets->string octets->octets + (encoded-length octets->string + octets->octets/encode decoded-length string->octets - octets->string))) + octets->octets/decode))) (encoded-length (required-argument) :type function :read-only t) (octets->string (required-argument) :type function :read-only t) - (octets->octets (required-argument) :type function :read-only t) + (octets->octets/encode (required-argument) :type function :read-only t) (decoded-length (required-argument) :type function :read-only t) (string->octets (required-argument) :type function :read-only t) - (octets->octets (required-argument) :type function :read-only t)) + (octets->octets/decode (required-argument) :type function :read-only t)) (defstruct (state (:copier nil) From 20f85bd2b676b39727e1ae0905fc9084aea9d691 Mon Sep 17 00:00:00 2001 From: Nathan Froyd Date: Sun, 24 Jan 2010 20:28:40 -0500 Subject: [PATCH 35/76] fix variable typos in several files --- ascii85.lisp | 2 +- base16.lisp | 2 +- base85.lisp | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/ascii85.lisp b/ascii85.lisp index 4816899..293cb92 100644 --- a/ascii85.lisp +++ b/ascii85.lisp @@ -53,7 +53,7 @@ (declare (type (integer 0 5) output-pending)) (flet ((expand-for-output (bits output-group) (cond - ((zerop group) + ((zerop bits) (setf (aref output-group 0) #\z) 1) (t diff --git a/base16.lisp b/base16.lisp index b59f042..df3a6f4 100644 --- a/base16.lisp +++ b/base16.lisp @@ -52,7 +52,7 @@ (when (base16-encode-state-finished-input-p state) (go FLUSH-BITS)) INPUT-CHECK - (when (>= index-index input-end) + (when (>= input-index input-end) (go DONE)) DO-INPUT (when (zerop n-bits) diff --git a/base85.lisp b/base85.lisp index 7a7c244..fd08348 100644 --- a/base85.lisp +++ b/base85.lisp @@ -41,7 +41,7 @@ (declaim (inline base85-encode)) (defun base85-encoder (state output input - output-start ouput-end + output-start output-end input-start input-end lastp converter) (declare (type base85-encode-state state)) (declare (type simple-octet-vector input)) From d3068c668a847f56f02d804303c070362565d76e Mon Sep 17 00:00:00 2001 From: Nathan Froyd Date: Sun, 24 Jan 2010 20:29:47 -0500 Subject: [PATCH 36/76] {ENCODE,DECODE}-STATE shouldn't have constructors --- types.lisp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/types.lisp b/types.lisp index 2b7a5eb..b54ed25 100644 --- a/types.lisp +++ b/types.lisp @@ -39,7 +39,7 @@ (defstruct (encode-state (:include state) (:copier nil) - (:constructor)) + (:constructor nil)) ;; LINE-BREAK describes after how many characters we should be ;; inserting newlines into the encoded output. It is zero if we ;; should never insert newlines. @@ -51,5 +51,5 @@ (defstruct (decode-state (:include state) (:copier nil) - (:constructor)) + (:constructor nil)) ) From 10574db096c609c4dadb0c76a57e67089dc1396b Mon Sep 17 00:00:00 2001 From: Nathan Froyd Date: Sun, 24 Jan 2010 20:30:39 -0500 Subject: [PATCH 37/76] add certainly-needed FINISHED-INPUT-P field to DECODE-STATE --- types.lisp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/types.lisp b/types.lisp index b54ed25..0645c89 100644 --- a/types.lisp +++ b/types.lisp @@ -52,4 +52,6 @@ (:include state) (:copier nil) (:constructor nil)) - ) + ;; FINISHED-INPUT-P is either T or NIL depending on whether we have + ;; seen all of the input to be encoded. + (finished-input-p nil)) From 9bd7108e26fa005d6ca1f386dac918bbb916b3ef Mon Sep 17 00:00:00 2001 From: Nathan Froyd Date: Sun, 24 Jan 2010 20:41:21 -0500 Subject: [PATCH 38/76] convert base32 over to the new format descriptor scheme --- base32.lisp | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/base32.lisp b/base32.lisp index 9933e96..5059f00 100644 --- a/base32.lisp +++ b/base32.lisp @@ -7,19 +7,25 @@ (defvar *base32hex-encode-table* #.(coerce "0123456789ABCDEFGHIJKLMNOPQRSTUV" 'simple-base-string)) +(defvar *base32-format-descriptor* + (make-format-descriptor #'encoded-length/base32 + #'octets->string/base32 + #'octets->octets/encode/base32 + #'decoded-length-base32 + #'string->octets/base32 + #'octets->octets/decode/base32)) + (defstruct (base32-encode-state (:include encode-state) (:copier nil) (:constructor make-base32-encode-state - (table - &aux (encoded-length #'encoded-length/base32) - (octets->octets #'octets->octets/base32) - (octets->string #'octets->string/base32)))) + (&aux (table *base32-encode-table*))) + (:constructor make-base32hex-encode-state + (&aux (table *base32hex-encode-table*)))) (bits 0 :type (unsigned-byte 16)) (n-bits 0 :type fixnum) (table *base32-encode-table* :read-only t :type (simple-array base-char (32))) - (adding-padding-p nil) (padding-remaining 0 :type (integer 0 6))) (declaim (inline base32-encoder)) @@ -44,7 +50,7 @@ (declare (type (simple-array fixnum (5)) n-pad-chars)) (tagbody PAD-CHECK - (when (base32-encode-state-adding-padding-p state) + (when (base32-encode-state-finished-input-p state) (go PAD)) INPUT-CHECK (when (>= input-index input-end) @@ -69,7 +75,7 @@ DONE (unless lastp (go RESTORE-STATE)) - (setf (base32-encode-state-adding-padding-p state) t) + (setf (base32-encode-state-finished-input-p state) t) (setf (base32-encode-state-padding-remaining state) (aref n-pad-chars n-bits)) PAD From 84dc3ba752c10c5caaac2b0b3e08046593738ead Mon Sep 17 00:00:00 2001 From: Nathan Froyd Date: Sun, 24 Jan 2010 20:43:10 -0500 Subject: [PATCH 39/76] rename OCTETS->OCTETS methods appropriately --- ascii85.lisp | 10 +++++----- base16.lisp | 10 +++++----- base32.lisp | 6 +++--- base64.lisp | 10 +++++----- base85.lisp | 10 +++++----- 5 files changed, 23 insertions(+), 23 deletions(-) diff --git a/ascii85.lisp b/ascii85.lisp index 293cb92..469789f 100644 --- a/ascii85.lisp +++ b/ascii85.lisp @@ -8,10 +8,10 @@ (defvar *ascii85-format-descriptor* (make-format-descriptor #'encoded-length/ascii85 #'octets->string/ascii85 - #'octets->octets/ascii85 + #'octets->octets/encode/ascii85 #'decoded-length-ascii85 #'string->octets/ascii85 - #'octets->octets/ascii85)) + #'octets->octets/decode/ascii85)) (defstruct (ascii85-encode-state (:include encode-state) @@ -128,9 +128,9 @@ (ascii85-encode-state-output-pending state) output-pending)) (values (- input-index input-start) (- output-index output-start))))) -(defun octets->octets/ascii85 (state output input - output-start output-end - input-start input-end lastp) +(defun octets->octets/encode/ascii85 (state output input + output-start output-end + input-start input-end lastp) (declare (type simple-octet-vector output)) (declare (optimize speed)) (ascii85-encoder state output input output-start output-end diff --git a/base16.lisp b/base16.lisp index df3a6f4..2eee085 100644 --- a/base16.lisp +++ b/base16.lisp @@ -14,10 +14,10 @@ (defvar *base16-format-descriptor* (make-format-descriptor #'encoded-length/base16 #'octets->string/base16 - #'octets->octets/base16 + #'octets->octets/encode/base16 #'decoded-length-base16 #'string->octets/base16 - #'octets->octets/base16)) + #'octets->octets/decode/base16)) (defstruct (base16-encode-state (:include encode-state) @@ -96,9 +96,9 @@ "Return the number of characters required to encode COUNT octets in Base16." (* count 2)) -(defun octets->octets/base16 (state output input - output-start output-end - input-start input-end lastp) +(defun octets->octets/encode/base16 (state output input + output-start output-end + input-start input-end lastp) (declare (type simple-octet-vector output)) (base16-encoder state output input output-start output-end input-start input-end lastp #'char-code)) diff --git a/base32.lisp b/base32.lisp index 5059f00..4313953 100644 --- a/base32.lisp +++ b/base32.lisp @@ -106,9 +106,9 @@ (base32-encode-state-n-bits state) n-bits)) (values (- input-index input-start) (- output-index output-start)))) -(defun octets->octets/base32 (state output input - output-start output-end - input-start input-end lastp) +(defun octets->octets/encode/base32 (state output input + output-start output-end + input-start input-end lastp) (declare (type simple-octet-vector output)) (declare (optimize speed)) (base32-encoder state output input output-start output-end diff --git a/base64.lisp b/base64.lisp index 46a1d45..27bd363 100644 --- a/base64.lisp +++ b/base64.lisp @@ -11,10 +11,10 @@ (defvar *base64-format-descriptor* (make-format-descriptor #'encoded-length/base64 #'octets->string/base64 - #'octets->octets/base64 + #'octets->octets/encode/base64 #'decoded-length-base64 #'string->octets/base64 - #'octets->octets/base64)) + #'octets->octets/decode/base64)) (defstruct (base64-encode-state (:copier nil) @@ -133,9 +133,9 @@ (base64-encode-state-n-bits state) n-bits)) (values (- input-index input-start) (- output-index output-start)))) -(defun octets->octets/base64 (state output input - output-start output-end - input-start input-end lastp) +(defun octets->octets/encode/base64 (state output input + output-start output-end + input-start input-end lastp) (declare (type simple-octet-vector output)) (declare (optimize speed)) (base64-encoder state output input output-start output-end diff --git a/base85.lisp b/base85.lisp index fd08348..28f53ff 100644 --- a/base85.lisp +++ b/base85.lisp @@ -8,10 +8,10 @@ (defvar *base85-format-descriptor* (make-format-descriptor #'encoded-length/base85 #'octets->string/base85 - #'octets->octets/base85 + #'octets->octets/encode/base85 #'decoded-length-base85 #'string->octets/base85 - #'octets->octets/base85)) + #'octets->octets/decode/base85)) (defstruct (base85-encode-state (:include encode-state) @@ -134,9 +134,9 @@ (base85-encode-state-output-pending state) output-pending)) (values (- input-index input-start) (- output-index output-start))))) -(defun octets->octets/base85 (state output input - output-start output-end - input-start input-end lastp) +(defun octets->octets/encode/base85 (state output input + output-start output-end + input-start input-end lastp) (declare (type simple-octet-vector output)) (declare (optimize speed)) (base85-encoder state output input output-start output-end From f4270b1cabee288fea5bb1359de06b357e950c8a Mon Sep 17 00:00:00 2001 From: Nathan Froyd Date: Sun, 24 Jan 2010 21:02:06 -0500 Subject: [PATCH 40/76] fix typos and indentation in ascii85 --- ascii85.lisp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/ascii85.lisp b/ascii85.lisp index 469789f..561154f 100644 --- a/ascii85.lisp +++ b/ascii85.lisp @@ -35,7 +35,7 @@ (declaim (inline ascii85-encode)) (defun ascii85-encoder (state output input - ouput-start output-end + output-start output-end input-start input-end lastp converter) (declare (type ascii85-encode-state state)) (declare (type simple-octet-vector input)) @@ -134,15 +134,15 @@ (declare (type simple-octet-vector output)) (declare (optimize speed)) (ascii85-encoder state output input output-start output-end - input-start input-end lastp #'char-code)) + input-start input-end lastp #'char-code)) (defun octets->string/ascii85 (state output input - output-start output-end - input-start input-end lastp) + output-start output-end + input-start input-end lastp) (declare (type simple-string output)) (declare (optimize speed)) (ascii85-encoder state output input output-start output-end - input-start input-end lastp #'identity)) + input-start input-end lastp #'identity)) (defun encode-octets-ascii85 (octets start end table writer) (declare (type (simple-array (unsigned-byte 8) (*)) octets)) From eb99637cbb517d2d7986cf798346a7018062ec05 Mon Sep 17 00:00:00 2001 From: Nathan Froyd Date: Sun, 24 Jan 2010 21:03:06 -0500 Subject: [PATCH 41/76] fix format descriptors to be constructed lazily --- ascii85.lisp | 17 ++++++++++------- base16.lisp | 17 ++++++++++------- base32.lisp | 17 ++++++++++------- base64.lisp | 17 ++++++++++------- base85.lisp | 17 ++++++++++------- 5 files changed, 50 insertions(+), 35 deletions(-) diff --git a/ascii85.lisp b/ascii85.lisp index 561154f..c37eee1 100644 --- a/ascii85.lisp +++ b/ascii85.lisp @@ -5,13 +5,16 @@ (defvar *ascii85-encode-table* #.(coerce "!\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstu" 'simple-base-string)) -(defvar *ascii85-format-descriptor* - (make-format-descriptor #'encoded-length/ascii85 - #'octets->string/ascii85 - #'octets->octets/encode/ascii85 - #'decoded-length-ascii85 - #'string->octets/ascii85 - #'octets->octets/decode/ascii85)) +(defun ascii85-format-descriptor () + (let ((fd (load-time-value nil))) + (if fd + fd + (setf fd (make-format-descriptor #'encoded-length/ascii85 + #'octets->string/ascii85 + #'octets->octets/encode/ascii85 + #'decoded-length-ascii85 + #'string->octets/ascii85 + #'octets->octets/decode/ascii85))))) (defstruct (ascii85-encode-state (:include encode-state) diff --git a/base16.lisp b/base16.lisp index 2eee085..21bb3ea 100644 --- a/base16.lisp +++ b/base16.lisp @@ -11,13 +11,16 @@ (make-decode-table *base16-encode-table*)) (declaim (type decode-table *base16-decode-table*)) -(defvar *base16-format-descriptor* - (make-format-descriptor #'encoded-length/base16 - #'octets->string/base16 - #'octets->octets/encode/base16 - #'decoded-length-base16 - #'string->octets/base16 - #'octets->octets/decode/base16)) +(defun base16-format-descriptor () + (let ((fd (load-time-value nil))) + (if fd + fd + (setf fd (make-format-descriptor #'encoded-length/base16 + #'octets->string/base16 + #'octets->octets/encode/base16 + #'decoded-length-base16 + #'string->octets/base16 + #'octets->octets/decode/base16))))) (defstruct (base16-encode-state (:include encode-state) diff --git a/base32.lisp b/base32.lisp index 4313953..ade341c 100644 --- a/base32.lisp +++ b/base32.lisp @@ -7,13 +7,16 @@ (defvar *base32hex-encode-table* #.(coerce "0123456789ABCDEFGHIJKLMNOPQRSTUV" 'simple-base-string)) -(defvar *base32-format-descriptor* - (make-format-descriptor #'encoded-length/base32 - #'octets->string/base32 - #'octets->octets/encode/base32 - #'decoded-length-base32 - #'string->octets/base32 - #'octets->octets/decode/base32)) +(defun base32-format-descriptor () + (let ((fd (load-time-value nil))) + (if fd + fd + (setf fd (make-format-descriptor #'encoded-length/base32 + #'octets->string/base32 + #'octets->octets/encode/base32 + #'decoded-length-base32 + #'string->octets/base32 + #'octets->octets/decode/base32))))) (defstruct (base32-encode-state (:include encode-state) diff --git a/base64.lisp b/base64.lisp index 27bd363..adf955e 100644 --- a/base64.lisp +++ b/base64.lisp @@ -8,13 +8,16 @@ (defvar *base64url-encode-table* #.(coerce "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_" 'simple-base-string)) -(defvar *base64-format-descriptor* - (make-format-descriptor #'encoded-length/base64 - #'octets->string/base64 - #'octets->octets/encode/base64 - #'decoded-length-base64 - #'string->octets/base64 - #'octets->octets/decode/base64)) +(defun base64-format-descriptor () + (let ((fd (load-time-value nil))) + (if fd + fd + (setf fd (make-format-descriptor #'encoded-length/base64 + #'octets->string/base64 + #'octets->octets/encode/base64 + #'decoded-length-base64 + #'string->octets/base64 + #'octets->octets/decode/base64))))) (defstruct (base64-encode-state (:copier nil) diff --git a/base85.lisp b/base85.lisp index 28f53ff..aaa96b6 100644 --- a/base85.lisp +++ b/base85.lisp @@ -5,13 +5,16 @@ (defvar *base85-encode-table* #.(coerce "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz!#$%&()*+-;<=>?@^_`{|}~" 'simple-base-string)) -(defvar *base85-format-descriptor* - (make-format-descriptor #'encoded-length/base85 - #'octets->string/base85 - #'octets->octets/encode/base85 - #'decoded-length-base85 - #'string->octets/base85 - #'octets->octets/decode/base85)) +(defun base85-format-descriptor () + (let ((fd (load-time-value nil))) + (if fd + fd + (setf fd (make-format-descriptor #'encoded-length/base85 + #'octets->string/base85 + #'octets->octets/encode/base85 + #'decoded-length-base85 + #'string->octets/base85 + #'octets->octets/decode/base85))))) (defstruct (base85-encode-state (:include encode-state) From 59f3abd0c0dffff44970d9709e7fc07e9e122675 Mon Sep 17 00:00:00 2001 From: Nathan Froyd Date: Sun, 24 Jan 2010 21:04:51 -0500 Subject: [PATCH 42/76] fix state constructors to pass format descriptors --- ascii85.lisp | 3 ++- base16.lisp | 6 ++++-- base32.lisp | 6 ++++-- base64.lisp | 6 ++++-- base85.lisp | 3 ++- 5 files changed, 16 insertions(+), 8 deletions(-) diff --git a/ascii85.lisp b/ascii85.lisp index c37eee1..4789052 100644 --- a/ascii85.lisp +++ b/ascii85.lisp @@ -19,7 +19,8 @@ (defstruct (ascii85-encode-state (:include encode-state) (:copier nil) - (:constructor make-ascii85-encode-state)) + (:constructor make-ascii85-encode-state + (&aux (descriptor (ascii85-format-descriptor))))) (bits 0 :type (unsigned-byte 32)) (pending 0 :type (integer 0 4)) (output-group (make-array 5 :element-type 'base-char) diff --git a/base16.lisp b/base16.lisp index 21bb3ea..d7efce8 100644 --- a/base16.lisp +++ b/base16.lisp @@ -26,9 +26,11 @@ (:include encode-state) (:copier nil) (:constructor make-base16-encode-state - (&aux (table *base16-encode-table*))) + (&aux (descriptor (base16-format-descriptor)) + (table *base16-encode-table*))) (:constructor make-hex-encode-state - (&aux (table *hex-encode-table*)))) + (&aux (descriptor (base16-format-descriptor)) + (table *hex-encode-table*)))) (bits 0 :type (unsigned-byte 8)) (n-bits 0 :type fixnum) (table *base16-encode-table* :read-only t diff --git a/base32.lisp b/base32.lisp index ade341c..662eace 100644 --- a/base32.lisp +++ b/base32.lisp @@ -22,9 +22,11 @@ (:include encode-state) (:copier nil) (:constructor make-base32-encode-state - (&aux (table *base32-encode-table*))) + (&aux (descriptor (base32-format-descriptor)) + (table *base32-encode-table*))) (:constructor make-base32hex-encode-state - (&aux (table *base32hex-encode-table*)))) + (&aux (descriptor (base32-format-descriptor)) + (table *base32hex-encode-table*)))) (bits 0 :type (unsigned-byte 16)) (n-bits 0 :type fixnum) (table *base32-encode-table* :read-only t diff --git a/base64.lisp b/base64.lisp index adf955e..3e652ee 100644 --- a/base64.lisp +++ b/base64.lisp @@ -22,9 +22,11 @@ (defstruct (base64-encode-state (:copier nil) (:constructor make-base64-encode-state - (&aux (table *base64-encode-table*))) + (&aux (descriptor (base64-format-descriptor)) + (table *base64-encode-table*))) (:constructor make-base64url-encode-state - (&aux (table *base64url-encode-table*)))) + (&aux (descriptor (base64-format-descriptor)) + (table *base64url-encode-table*)))) (bits 0 :type (unsigned-byte 16)) (n-bits 0 :type fixnum) (table *base64-encode-table* :read-only t :type (simple-array base-char (64))) diff --git a/base85.lisp b/base85.lisp index aaa96b6..ee3565a 100644 --- a/base85.lisp +++ b/base85.lisp @@ -19,7 +19,8 @@ (defstruct (base85-encode-state (:include encode-state) (:copier nil) - (:constructor make-base85-encode-state)) + (:constructor make-base85-encode-state + (&aux (descriptor (base85-format-descriptor))))) ;; TODO: Clever hack for little-endian machines: fill in GROUP ;; back-to-front, using PENDING to count down, then use SBCL's ;; %VECTOR-RAW-BITS or similar to read out the group in proper From fed2ba93957fc3b056b74dd584b140bfa9ec99e2 Mon Sep 17 00:00:00 2001 From: Nathan Froyd Date: Sun, 24 Jan 2010 22:16:39 -0500 Subject: [PATCH 43/76] fix bogus uses of LOAD-TIME-VALUE That's what I get for attempting to be clever. --- ascii85.lisp | 16 +++++++++------- base16.lisp | 16 +++++++++------- base32.lisp | 16 +++++++++------- base64.lisp | 16 +++++++++------- base85.lisp | 16 +++++++++------- 5 files changed, 45 insertions(+), 35 deletions(-) diff --git a/ascii85.lisp b/ascii85.lisp index 4789052..09f7e0c 100644 --- a/ascii85.lisp +++ b/ascii85.lisp @@ -6,15 +6,17 @@ #.(coerce "!\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstu" 'simple-base-string)) (defun ascii85-format-descriptor () - (let ((fd (load-time-value nil))) + (let ((cell (load-time-value (list nil))) + (fd (car cell))) (if fd fd - (setf fd (make-format-descriptor #'encoded-length/ascii85 - #'octets->string/ascii85 - #'octets->octets/encode/ascii85 - #'decoded-length-ascii85 - #'string->octets/ascii85 - #'octets->octets/decode/ascii85))))) + (setf (car cell) + (make-format-descriptor #'encoded-length/ascii85 + #'octets->string/ascii85 + #'octets->octets/encode/ascii85 + #'decoded-length-ascii85 + #'string->octets/ascii85 + #'octets->octets/decode/ascii85))))) (defstruct (ascii85-encode-state (:include encode-state) diff --git a/base16.lisp b/base16.lisp index d7efce8..5b48226 100644 --- a/base16.lisp +++ b/base16.lisp @@ -12,15 +12,17 @@ (declaim (type decode-table *base16-decode-table*)) (defun base16-format-descriptor () - (let ((fd (load-time-value nil))) + (let ((cell (load-time-value (list nil))) + (fd (car cell))) (if fd fd - (setf fd (make-format-descriptor #'encoded-length/base16 - #'octets->string/base16 - #'octets->octets/encode/base16 - #'decoded-length-base16 - #'string->octets/base16 - #'octets->octets/decode/base16))))) + (setf (car cell) + (make-format-descriptor #'encoded-length/base16 + #'octets->string/base16 + #'octets->octets/encode/base16 + #'decoded-length-base16 + #'string->octets/base16 + #'octets->octets/decode/base16))))) (defstruct (base16-encode-state (:include encode-state) diff --git a/base32.lisp b/base32.lisp index 662eace..2906d3b 100644 --- a/base32.lisp +++ b/base32.lisp @@ -8,15 +8,17 @@ #.(coerce "0123456789ABCDEFGHIJKLMNOPQRSTUV" 'simple-base-string)) (defun base32-format-descriptor () - (let ((fd (load-time-value nil))) + (let ((cell (load-time-value (list nil))) + (fd (car cell))) (if fd fd - (setf fd (make-format-descriptor #'encoded-length/base32 - #'octets->string/base32 - #'octets->octets/encode/base32 - #'decoded-length-base32 - #'string->octets/base32 - #'octets->octets/decode/base32))))) + (setf (car cell) + (make-format-descriptor #'encoded-length/base32 + #'octets->string/base32 + #'octets->octets/encode/base32 + #'decoded-length-base32 + #'string->octets/base32 + #'octets->octets/decode/base32))))) (defstruct (base32-encode-state (:include encode-state) diff --git a/base64.lisp b/base64.lisp index 3e652ee..15ae7f5 100644 --- a/base64.lisp +++ b/base64.lisp @@ -9,15 +9,17 @@ #.(coerce "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_" 'simple-base-string)) (defun base64-format-descriptor () - (let ((fd (load-time-value nil))) + (let ((cell (load-time-value (list nil))) + (fd (car cell))) (if fd fd - (setf fd (make-format-descriptor #'encoded-length/base64 - #'octets->string/base64 - #'octets->octets/encode/base64 - #'decoded-length-base64 - #'string->octets/base64 - #'octets->octets/decode/base64))))) + (setf (car cell) + (make-format-descriptor #'encoded-length/base64 + #'octets->string/base64 + #'octets->octets/encode/base64 + #'decoded-length-base64 + #'string->octets/base64 + #'octets->octets/decode/base64))))) (defstruct (base64-encode-state (:copier nil) diff --git a/base85.lisp b/base85.lisp index ee3565a..fdc82a4 100644 --- a/base85.lisp +++ b/base85.lisp @@ -6,15 +6,17 @@ #.(coerce "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz!#$%&()*+-;<=>?@^_`{|}~" 'simple-base-string)) (defun base85-format-descriptor () - (let ((fd (load-time-value nil))) + (let ((cell (load-time-value (list nil))) + (fd (car cell))) (if fd fd - (setf fd (make-format-descriptor #'encoded-length/base85 - #'octets->string/base85 - #'octets->octets/encode/base85 - #'decoded-length-base85 - #'string->octets/base85 - #'octets->octets/decode/base85))))) + (setf (car cell) + (make-format-descriptor #'encoded-length/base85 + #'octets->string/base85 + #'octets->octets/encode/base85 + #'decoded-length-base85 + #'string->octets/base85 + #'octets->octets/decode/base85))))) (defstruct (base85-encode-state (:include encode-state) From 548855d29d11a28ae0e39b7b12685fa325ca52ae Mon Sep 17 00:00:00 2001 From: Nathan Froyd Date: Fri, 29 Jan 2010 21:00:44 -0500 Subject: [PATCH 44/76] change LET to LET* to fix compilation errors --- ascii85.lisp | 4 ++-- base16.lisp | 4 ++-- base32.lisp | 4 ++-- base64.lisp | 4 ++-- base85.lisp | 4 ++-- 5 files changed, 10 insertions(+), 10 deletions(-) diff --git a/ascii85.lisp b/ascii85.lisp index 09f7e0c..08253b9 100644 --- a/ascii85.lisp +++ b/ascii85.lisp @@ -6,8 +6,8 @@ #.(coerce "!\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstu" 'simple-base-string)) (defun ascii85-format-descriptor () - (let ((cell (load-time-value (list nil))) - (fd (car cell))) + (let* ((cell (load-time-value (list nil))) + (fd (car cell))) (if fd fd (setf (car cell) diff --git a/base16.lisp b/base16.lisp index 5b48226..86cf14b 100644 --- a/base16.lisp +++ b/base16.lisp @@ -12,8 +12,8 @@ (declaim (type decode-table *base16-decode-table*)) (defun base16-format-descriptor () - (let ((cell (load-time-value (list nil))) - (fd (car cell))) + (let* ((cell (load-time-value (list nil))) + (fd (car cell))) (if fd fd (setf (car cell) diff --git a/base32.lisp b/base32.lisp index 2906d3b..664cd17 100644 --- a/base32.lisp +++ b/base32.lisp @@ -8,8 +8,8 @@ #.(coerce "0123456789ABCDEFGHIJKLMNOPQRSTUV" 'simple-base-string)) (defun base32-format-descriptor () - (let ((cell (load-time-value (list nil))) - (fd (car cell))) + (let* ((cell (load-time-value (list nil))) + (fd (car cell))) (if fd fd (setf (car cell) diff --git a/base64.lisp b/base64.lisp index 15ae7f5..8d88e77 100644 --- a/base64.lisp +++ b/base64.lisp @@ -9,8 +9,8 @@ #.(coerce "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_" 'simple-base-string)) (defun base64-format-descriptor () - (let ((cell (load-time-value (list nil))) - (fd (car cell))) + (let* ((cell (load-time-value (list nil))) + (fd (car cell))) (if fd fd (setf (car cell) diff --git a/base85.lisp b/base85.lisp index fdc82a4..4aeb1a6 100644 --- a/base85.lisp +++ b/base85.lisp @@ -6,8 +6,8 @@ #.(coerce "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz!#$%&()*+-;<=>?@^_`{|}~" 'simple-base-string)) (defun base85-format-descriptor () - (let ((cell (load-time-value (list nil))) - (fd (car cell))) + (let* ((cell (load-time-value (list nil))) + (fd (car cell))) (if fd fd (setf (car cell) From 4aa54d248ac7bc0e5dd04df172e6f72cdc23c505 Mon Sep 17 00:00:00 2001 From: Nathan Froyd Date: Fri, 29 Jan 2010 21:03:55 -0500 Subject: [PATCH 45/76] add :CONC-NAME to FORMAT-DESCRIPTOR --- types.lisp | 1 + 1 file changed, 1 insertion(+) diff --git a/types.lisp b/types.lisp index 0645c89..00d4e3d 100644 --- a/types.lisp +++ b/types.lisp @@ -17,6 +17,7 @@ (error "Required argument not provided")) (defstruct (format-descriptor + (:conc-name fd-) (:copier nil) (:constructor make-format-descriptor (encoded-length octets->string From 3cf3b01945bf5a7a22b275c7fa5bdc5e725293f8 Mon Sep 17 00:00:00 2001 From: Nathan Froyd Date: Fri, 29 Jan 2010 21:11:43 -0500 Subject: [PATCH 46/76] clean up experimental commented-out code in octets.lisp --- octets.lisp | 43 ++++++++++++++++++++++++++----------------- 1 file changed, 26 insertions(+), 17 deletions(-) diff --git a/octets.lisp b/octets.lisp index f6213f0..509df40 100644 --- a/octets.lisp +++ b/octets.lisp @@ -154,8 +154,11 @@ octet vector with a fill pointer." (defun encode-to-fresh-vector (octets format start end element-type) (multiple-value-bind (input start end) (array-data-and-offsets octets start end) - (let* ((state (find-format-state format)) - (length (funcall (encode-state-encoded-length state) (- end start)))) + (let* ((state (find-encoder format)) + (fd (state-descriptor state)) + (length (funcall (fd-encoded-length fd) (- end start)))) + (declare (type encode-state state)) + (declare (type format-descriptor fd)) (flet ((frob (etype encode-fun) (let ((v (make-array length :element-type etype))) (funcall encode-fun state v octets @@ -164,11 +167,11 @@ octet vector with a fill pointer." (declare (inline frob)) (ecase (canonical-element-type element-type) (character - (frob 'character (encode-state-octets->string state))) + (frob 'character (fd-octets->string fd))) (base-char - (frob 'base-char (encode-state-octets->string state))) + (frob 'base-char (fd-octets->string fd))) (ub8 - (frob '(unsigned-byte 8) (encode-state-octets->octets state)))))))) + (frob '(unsigned-byte 8) (fd-octets->octets/encode fd)))))))) (defun encode (octets format &key (start 0) end (element-type 'base-char)) (encode-to-fresh-vector octets format start end element-type)) @@ -193,7 +196,10 @@ written are returned as multiple values. ELEMENT-TYPE is ignored. If FINISHP is true, then in addition to any encoding of OCTETS, also output any necessary padding required by FORMAT." - (let ((format-state (find-format format))) + (let ((state (find-encoder format)) + (fd (state-descriptor state))) + (declare (type encode-state state)) + (declare (type format-descriptor fd)) (flet ((frob (encode-fun) (multiple-value-bind (input input-start input-end) (array-data-and-offsets octets start end) @@ -206,33 +212,36 @@ any necessary padding required by FORMAT." (declare (inline frob)) (etypecase destination (null - (encode-to-fresh-vector octets format-state start end element-type)) + (encode-to-fresh-vector octets state start end element-type)) (string - (frob (encode-state-octets->string format-state))) + (frob (fd-octets->string (state-descriptor state)))) ((array (unsigned-byte 8) (*)) - (frob (encode-state-octets->octets format-state))))))) + (frob (fd-octets->octets/encode (state-descriptor state)))))))) (defun decode-to-fresh-vector (string format-start start end) (multiple-value-bind (input start end) (array-data-and-offsets string start end) (let* ((state (find-format-state format case-fold map01)) - (length (funcall (decode-state-decoded-length state) - (- end start)))) + (fd (state-descriptor state)) + (length (funcall (fd-decoded-length fd) (- end start)))) + (declare (type decode-state state)) + (declare (type format-descriptor fd)) (flet ((frob (v decode-fun) (funcall decode-fun state v string 0 length start end))) (let ((octets (make-array length :element-type '(unsigned-byte 8)))) (etypecase string (simple-string - (frob octets (decode-state-string->octets state))) + (frob octets (fd-string->octets fd))) (simple-octet-vector - (frob octets (decode-state-octets->octets state))))))))) + (frob octets (fd-octets->octets/decode fd))))))))) (defun decode-octets (destination string format &key (start 0) end (output-start 0) output-end case-fold map01 finishp) "Decode the characters of STRING between START and END into octets according to FORMAT. DECODED-LENGTH indicates the number of decoded -octets to expect. DESTINATION may be NIL. - (let ((format-state (find-format format case-fold map01))) +octets to expect. DESTINATION may be NIL." + (let ((state (find-format format case-fold map01))) + (declare (type decode-state state)) (flet ((frob (decode-fun) (multiple-value-bind (input input-start input-end) (array-data-and-offsets string start end) @@ -247,9 +256,9 @@ octets to expect. DESTINATION may be NIL. (null (decode-to-fresh-vector string format-start start end)) (string - (frob (decode-state-string->octets format-state))) + (frob (fd-string->octets (state-descriptor state))) ((array (unsigned-byte 8) (*)) - (frob (decode-state-octets->octets format-state))))))) + (frob (fd-octets->octets/decode (state-descriptor state)))))))))g ||# (defun decode-octets (destination string format From d1ab94ea6091ec959b8098933fc78d04522e3a8d Mon Sep 17 00:00:00 2001 From: Nathan Froyd Date: Fri, 29 Jan 2010 23:44:49 -0500 Subject: [PATCH 47/76] add format.lisp to flesh out encoding/decoding bits --- binascii.asd | 3 ++- format.lisp | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+), 1 deletion(-) create mode 100644 format.lisp diff --git a/binascii.asd b/binascii.asd index 5d3b9ec..e39bc88 100644 --- a/binascii.asd +++ b/binascii.asd @@ -13,7 +13,8 @@ :components ((:static-file "LICENSE") (:file "package") (:file "types" :depends-on ("package")) - (:file "octets" :depends-on ("types")) + (:file "format" :depends-on ("types")) + (:file "octets" :depends-on ("types" "format")) (:file "ascii85" :depends-on ("octets")) (:file "base85" :depends-on ("octets")) (:file "base64" :depends-on ("octets")) diff --git a/format.lisp b/format.lisp new file mode 100644 index 0000000..86fc234 --- /dev/null +++ b/format.lisp @@ -0,0 +1,50 @@ +;;;; format.lisp -- a central repository for encoding formats and accessors + +(cl:in-package :binascii) + +(defvar *format-descriptors* (make-hash-table)) + +(defvar *format-state-constructors* (make-hash-table)) + +(defun unknown-format-error (format) + (error "Unknown format ~A" format)) + +(defun find-descriptor-for-format-or-lose (format) + (or (gethash format *format-descriptors*) + (unknown-format-error format))) + +(defun find-encode-state-constructor-or-lose (format) + (or (car (gethash format *format-state-constructors*)) + (unknown-format-error format))) + +(defun find-decode-state-constructor-or-lose (format) + (or (cdr (gethash format *format-state-constructors*)) + (unknown-format-error format))) + +(defun register-descriptor-and-constructors (format + descriptor + encoder-constructor + decoder-constructor) + (setf (gethash format *format-descriptors*) descriptor) + (setf (gethash format *format-state-constructors*) + (cons encoder-constructor decoder-constructor)) + format) + +(defun find-encoder (format) + "Return the appropriate ENCODE-STATE for FORMAT." + (etypecase format + (symbol + (let ((constructor (find-encode-state-constructor-or-lose format))) + (funcall (the function constructor)))) + (encode-state + format))) + +(defun find-decoder (format case-fold map01) + "Return the appropriate DECODE-STATE for FORMAT. If FORMAT is a symbol, +use CASE-FOLD and MAP01 to parameterize the returned decoder." + (etypecase format + (symbol + (let ((constructor (find-decode-state-constructor-or-lose format))) + (funcall (the function constructor) case-fold map01))) + (decode-state + format))) From b2fea4d7ccd6e68dbb344f2f02e9e77b5d64036a Mon Sep 17 00:00:00 2001 From: Nathan Froyd Date: Sat, 30 Jan 2010 13:55:42 -0500 Subject: [PATCH 48/76] ensure that ENCODE-TO-FRESH-VECTOR receives an ENCODE-STATE --- octets.lisp | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/octets.lisp b/octets.lisp index 509df40..fb76213 100644 --- a/octets.lisp +++ b/octets.lisp @@ -151,13 +151,12 @@ octet vector with a fill pointer." (values v start (or end (length v)))) #|| -(defun encode-to-fresh-vector (octets format start end element-type) +(defun encode-to-fresh-vector (octets state start end element-type) + (declare (type encode-state state)) (multiple-value-bind (input start end) (array-data-and-offsets octets start end) - (let* ((state (find-encoder format)) - (fd (state-descriptor state)) + (let* ((fd (state-descriptor state)) (length (funcall (fd-encoded-length fd) (- end start)))) - (declare (type encode-state state)) (declare (type format-descriptor fd)) (flet ((frob (etype encode-fun) (let ((v (make-array length :element-type etype))) @@ -174,7 +173,7 @@ octet vector with a fill pointer." (frob '(unsigned-byte 8) (fd-octets->octets/encode fd)))))))) (defun encode (octets format &key (start 0) end (element-type 'base-char)) - (encode-to-fresh-vector octets format start end element-type)) + (encode-to-fresh-vector octets (find-encoder format) start end element-type)) (defun encode-octets (destination octets format &key (start 0) end (output-start 0) output-end (element-type 'base-char) From 06e7f40fed7db59476c93db7c6d15de8b189226d Mon Sep 17 00:00:00 2001 From: Nathan Froyd Date: Sat, 30 Jan 2010 13:56:00 -0500 Subject: [PATCH 49/76] fix up decoding interface to match encoding interface --- octets.lisp | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/octets.lisp b/octets.lisp index fb76213..fbd7522 100644 --- a/octets.lisp +++ b/octets.lisp @@ -217,13 +217,12 @@ any necessary padding required by FORMAT." ((array (unsigned-byte 8) (*)) (frob (fd-octets->octets/encode (state-descriptor state)))))))) -(defun decode-to-fresh-vector (string format-start start end) +(defun decode-to-fresh-vector (string state start end) + (declare (type decode-state state)) (multiple-value-bind (input start end) (array-data-and-offsets string start end) - (let* ((state (find-format-state format case-fold map01)) - (fd (state-descriptor state)) + (let* ((fd (state-descriptor state)) (length (funcall (fd-decoded-length fd) (- end start)))) - (declare (type decode-state state)) (declare (type format-descriptor fd)) (flet ((frob (v decode-fun) (funcall decode-fun state v string 0 length start end))) @@ -234,12 +233,16 @@ any necessary padding required by FORMAT." (simple-octet-vector (frob octets (fd-octets->octets/decode fd))))))))) +(defun decode (string format &key (start 0) end case-fold map01) + (decode-to-fresh-vector string (find-decoder format case-fold map01) + start end)) + (defun decode-octets (destination string format &key (start 0) end (output-start 0) output-end case-fold map01 finishp) "Decode the characters of STRING between START and END into octets according to FORMAT. DECODED-LENGTH indicates the number of decoded octets to expect. DESTINATION may be NIL." - (let ((state (find-format format case-fold map01))) + (let ((state (find-decoder format case-fold map01))) (declare (type decode-state state)) (flet ((frob (decode-fun) (multiple-value-bind (input input-start input-end) @@ -253,7 +256,7 @@ octets to expect. DESTINATION may be NIL." (declare (inline frob)) (etypecase string (null - (decode-to-fresh-vector string format-start start end)) + (decode-to-fresh-vector string state start end)) (string (frob (fd-string->octets (state-descriptor state))) ((array (unsigned-byte 8) (*)) From 68848cc628b94ed771b795543b79d34e1003ccef Mon Sep 17 00:00:00 2001 From: Nathan Froyd Date: Sat, 30 Jan 2010 14:11:50 -0500 Subject: [PATCH 50/76] fix BASE-CAR typo in ascii85 --- ascii85.lisp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ascii85.lisp b/ascii85.lisp index 08253b9..95b3977 100644 --- a/ascii85.lisp +++ b/ascii85.lisp @@ -29,7 +29,7 @@ :read-only t :type (simple-array base-char (5))) (output-pending 0 :type (integer 0 5)) (table *ascii85-encode-table* :read-only t - :type (simple-array base-car (85)))) + :type (simple-array base-char (85)))) (defun encoded-length/ascii85 (count) "Return the number of characters required to encode COUNT octets in Ascii85." From 7ac97689956d965bd0ff416b85731ef805cb1438 Mon Sep 17 00:00:00 2001 From: Nathan Froyd Date: Sat, 30 Jan 2010 14:12:07 -0500 Subject: [PATCH 51/76] fix call to ASCII85-ENCODE-STATE-PENDING --- ascii85.lisp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ascii85.lisp b/ascii85.lisp index 95b3977..ebc910a 100644 --- a/ascii85.lisp +++ b/ascii85.lisp @@ -50,7 +50,7 @@ (let ((input-index input-start) (output-index output-start) (bits (ascii85-encode-state-bits state)) - (pending (ascii85-encode-state-pending)) + (pending (ascii85-encode-state-pending state)) (output-group (ascii85-encode-state-output-group state)) (output-pending (ascii85-encode-state-output-pending state))) (declare (type index input-index output-index)) From 340b6b83699963122a7f876f904b6345171ffae9 Mon Sep 17 00:00:00 2001 From: Nathan Froyd Date: Sat, 30 Jan 2010 14:12:26 -0500 Subject: [PATCH 52/76] make BASE64-ENCODE-STATE :INCLUDE ENCODE-STATE --- base64.lisp | 1 + 1 file changed, 1 insertion(+) diff --git a/base64.lisp b/base64.lisp index 8d88e77..c6470f1 100644 --- a/base64.lisp +++ b/base64.lisp @@ -22,6 +22,7 @@ #'octets->octets/decode/base64))))) (defstruct (base64-encode-state + (:include encode-state) (:copier nil) (:constructor make-base64-encode-state (&aux (descriptor (base64-format-descriptor)) From 886ab6e0046972bce16b888f883025c2fbe532e2 Mon Sep 17 00:00:00 2001 From: Nathan Froyd Date: Sat, 30 Jan 2010 14:14:03 -0500 Subject: [PATCH 53/76] fix ENCODING-TOOLS for ascii85 --- ascii85.lisp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ascii85.lisp b/ascii85.lisp index ebc910a..92b597a 100644 --- a/ascii85.lisp +++ b/ascii85.lisp @@ -181,7 +181,7 @@ (output buffer group count))))) (defmethod encoding-tools ((format (eql :ascii85))) - (values #'encode-octets-ascii85 #'encoded-length-ascii85 + (values #'encode-octets-ascii85 #'encoded-length/ascii85 *ascii85-encode-table*)) (defvar *ascii85-decode-table* (make-decode-table *ascii85-encode-table*)) From f4d2620141a10f2b454ccb183365c86dcaa3d633 Mon Sep 17 00:00:00 2001 From: Nathan Froyd Date: Sat, 30 Jan 2010 14:22:14 -0500 Subject: [PATCH 54/76] flip to the new encoding scheme Use dummy constructors for the decoding bits, since we're not using the new scheme there yet. base85 tests do not yet pass. --- ascii85.lisp | 40 +++++------------------- base16.lisp | 27 +++++++---------- base32.lisp | 53 ++++++++------------------------ base64.lisp | 41 ++++++++----------------- base85.lisp | 34 ++++++--------------- octets.lisp | 86 ++++++---------------------------------------------- 6 files changed, 63 insertions(+), 218 deletions(-) diff --git a/ascii85.lisp b/ascii85.lisp index 92b597a..4966cea 100644 --- a/ascii85.lisp +++ b/ascii85.lisp @@ -150,39 +150,11 @@ (ascii85-encoder state output input output-start output-end input-start input-end lastp #'identity)) -(defun encode-octets-ascii85 (octets start end table writer) - (declare (type (simple-array (unsigned-byte 8) (*)) octets)) - (declare (type index start end)) - (declare (type function writer)) - (declare (ignore table)) - (flet ((output (buffer group count) - (if (zerop group) - (funcall writer #\z) - (loop for i from 4 downto 0 - do (multiple-value-bind (g b) (truncate group 85) - (setf group g - (aref buffer i) (code-char (+ #.(char-code #\!) b)))) - finally (dotimes (i (1+ count)) - (funcall writer (aref buffer i))))))) - (loop with length = (- end start) - with buffer = (make-string 5) - with group of-type (unsigned-byte 32) = 0 - with count of-type fixnum = 0 - with shift of-type fixnum = 24 - until (zerop length) - do (setf group (logior group (ash (aref octets start) shift))) - (incf start) - (decf length) - (decf shift 8) - (when (= (incf count) 4) - (output buffer group count) - (setf group 0 count 0 shift 24)) - finally (unless (zerop count) - (output buffer group count))))) +(defun string->octets/ascii85 () + ) -(defmethod encoding-tools ((format (eql :ascii85))) - (values #'encode-octets-ascii85 #'encoded-length/ascii85 - *ascii85-encode-table*)) +(defun octets->octets/decode/ascii85 () + ) (defvar *ascii85-decode-table* (make-decode-table *ascii85-encode-table*)) (declaim (type decode-table *ascii85-decode-table*)) @@ -238,3 +210,7 @@ (declare (ignorable case-fold map01)) (values #'decode-octets-ascii85 #'decoded-length-ascii85 *ascii85-decode-table*)) + +(register-descriptor-and-constructors :ascii85 (ascii85-format-descriptor) + #'make-ascii85-encode-state + #'make-ascii85-encode-state) diff --git a/base16.lisp b/base16.lisp index 86cf14b..6fddae8 100644 --- a/base16.lisp +++ b/base16.lisp @@ -117,23 +117,11 @@ (base16-encoder state output input output-start output-end input-start input-end lastp #'identity)) -(defun encode-octets-base16 (octets start end table writer) - (declare (type (simple-array (unsigned-byte 8) (*)) octets)) - (declare (type index start end)) - (declare (type function writer)) - (declare (type (simple-array base-char (16)) table)) - (loop for i from start below end - do (let ((byte (aref octets i))) - (funcall writer (aref table (ldb (byte 4 4) byte))) - (funcall writer (aref table (ldb (byte 4 0) byte)))))) +(defun string->octets/base16 () + ) -(defmethod encoding-tools ((format (eql :base16))) - (values #'encode-octets-base16 #'encoded-length/base16 - *base16-encode-table*)) - -(defmethod encoding-tools ((format (eql :hex))) - (values #'encode-octets-base16 #'encoded-length/base16 - *hex-encode-table*)) +(defun octets->octets/decode/base16 () + ) (defun decode-octets-base16 (string start end length table writer) (declare (type index start end)) @@ -174,3 +162,10 @@ #'decoded-length-base16 (case-fold-decode-table *base16-decode-table* *base16-encode-table*))) + +(register-descriptor-and-constructors :base16 (base16-format-descriptor) + #'make-base16-encode-state + #'make-base16-encode-state) +(register-descriptor-and-constructors :hex (base16-format-descriptor) + #'make-hex-encode-state + #'make-hex-encode-state) diff --git a/base32.lisp b/base32.lisp index 664cd17..fd74ee5 100644 --- a/base32.lisp +++ b/base32.lisp @@ -129,52 +129,16 @@ (base32-encoder state output input output-start output-end input-start input-end lastp #'identity)) -(defun encode-octets-base32 (octets start end table writer) - (declare (type (simple-array (unsigned-byte 8) (*)) octets)) - (declare (type index start end)) - (declare (type function writer)) - (declare (type (simple-array base-char (32)) table)) - (loop for i from start below end - for bits of-type (unsigned-byte 16) = (aref octets i) - then (ldb (byte 16 0) (logior (ash bits 8) (aref octets i))) - for n-bits of-type fixnum = 8 then (+ n-bits 8) - do (loop while (>= n-bits 5) - do (decf n-bits 5) - (funcall writer (aref table (ldb (byte 5 n-bits) bits)))) - finally (let ((n-pad - (case n-bits - (3 - (funcall writer (aref table - (ash (ldb (byte 3 0) bits) 2))) - 6) - (1 - (funcall writer (aref table - (ash (ldb (byte 1 0) bits) 4))) - 4) - (4 - (funcall writer (aref table - (ash (ldb (byte 4 0) bits) 1))) - 3) - (2 - (funcall writer (aref table - (ash (ldb (byte 2 0) bits) 3))) - 1) - (otherwise 0)))) - (dotimes (i n-pad) - (funcall writer #\=))))) +(defun string->octets/base32 () + ) + +(defun octets->octets/decode/base32 () + ) (defun encoded-length/base32 (count) "Return the number of characters required to encode COUNT octets in Base32." (* (ceiling count 5) 8)) -(defmethod encoding-tools ((format (eql :base32))) - (values #'encode-octets-base32 #'encoded-length/base32 - *base32-encode-table*)) - -(defmethod encoding-tools ((format (eql :base32hex))) - (values #'encode-octets-base32 #'encoded-length/base32 - *base32hex-encode-table*)) - (defvar *base32-decode-table* (make-decode-table *base32-encode-table*)) (defvar *base32hex-decode-table* (make-decode-table *base32hex-encode-table*)) (declaim (type decode-table *base32-decode-table* *base32hex-decode-table*)) @@ -226,3 +190,10 @@ (declare (ignorable case-fold map01)) (values #'decode-octets-base32 #'decoded-length-base32 *base32hex-decode-table*)) + +(register-descriptor-and-constructors :base32 (base32-format-descriptor) + #'make-base32-encode-state + #'make-base32-encode-state) +(register-descriptor-and-constructors :base32hex (base32-format-descriptor) + #'make-base32hex-encode-state + #'make-base32hex-encode-state) diff --git a/base64.lisp b/base64.lisp index c6470f1..71e5671 100644 --- a/base64.lisp +++ b/base64.lisp @@ -157,40 +157,16 @@ (base64-encoder state output input output-start output-end input-start input-end lastp #'identity)) -(defun encode-octets-base64 (octets start end table writer) - (declare (type (simple-array (unsigned-byte 8) (*)) octets)) - (declare (type index start end)) - (declare (type function writer)) - (declare (type (simple-array base-char (64)) table)) - (loop - for i from start below end - for bits of-type (unsigned-byte 16) = (aref octets i) - then (ldb (byte 16 0) (logior (ash bits 8) (aref octets i))) - for n-bits of-type fixnum = 8 then (+ n-bits 8) - do (loop while (>= n-bits 6) - do (decf n-bits 6) - (funcall writer (aref table (ldb (byte 6 n-bits) bits)))) - finally (cond - ((= n-bits 2) - (funcall writer (aref table (ash (ldb (byte 2 0) bits) 4))) - (funcall writer #\=) - (funcall writer #\=)) - ((= n-bits 4) - (funcall writer (aref table (ash (ldb (byte 4 0) bits) 2))) - (funcall writer #\=))))) +(defun string->octets/base64 () + ) + +(defun octets->octets/decode/base64 () + ) (defun encoded-length/base64 (count) "Return the number of characters required to encode COUNT octets in Base64." (* (ceiling count 3) 4)) -(defmethod encoding-tools ((format (eql :base64))) - (values #'encode-octets-base64 #'encoded-length/base64 - *base64-encode-table*)) - -(defmethod encoding-tools ((format (eql :base64url))) - (values #'encode-octets-base64 #'encoded-length/base64 - *base64url-encode-table*)) - (defvar *base64-decode-table* (make-decode-table *base64-encode-table*)) (declaim (type decode-table *base64-decode-table*)) @@ -236,3 +212,10 @@ (declare (ignorable case-fold map01)) (values #'decode-octets-base64 #'decoded-length-base64 *base64url-decode-table*)) + +(register-descriptor-and-constructors :base64 (base64-format-descriptor) + #'make-base64-encode-state + #'make-base64-encode-state) +(register-descriptor-and-constructors :base64url (base64-format-descriptor) + #'make-base64url-encode-state + #'make-base64url-encode-state) diff --git a/base85.lisp b/base85.lisp index 4aeb1a6..45f4fc0 100644 --- a/base85.lisp +++ b/base85.lisp @@ -156,30 +156,11 @@ (base85-encoder state output input output-start output-end input-start input-end lastp #'identity)) -(defun encode-octets-base85 (octets start end table writer) - (declare (type (simple-array (unsigned-byte 8) (*)) octets)) - (declare (type index start end)) - (declare (type (simple-array base-char (85)) table)) - (declare (type function writer)) - (loop with length = (- end start) - with buffer = (make-string 5) - while (plusp length) - do (let ((group (do ((g 0) - (i 24 (- i 8))) - ((or (zerop length) (< i 0)) g) - (setf g (logior (ash (aref octets start) i) g)) - (incf start) - (decf length)))) - (loop for i from 4 downto 0 - do (multiple-value-bind (g b) (truncate group 85) - (setf group g - (aref buffer i) (aref table b))) - finally (dotimes (i 5) - (funcall writer (aref buffer i))))))) - -(defmethod encoding-tools ((format (eql :base85))) - (values #'encode-octets-base85 #'encoded-length/base85 - *base85-encode-table*)) +(defun string->octets/base85 () + ) + +(defun octets->octets/decode/base85 () + ) (defvar *base85-decode-table* (make-decode-table *base85-encode-table*)) (declaim (type decode-table *base85-decode-table*)) @@ -218,3 +199,8 @@ (declare (ignorable case-fold map01)) (values #'decode-octets-base85 #'decoded-length-base85 *base85-decode-table*)) + +(register-descriptor-and-constructors :base85 (base85-format-descriptor) + #'make-base85-encode-state + #'make-base85-encode-state) + diff --git a/octets.lisp b/octets.lisp index fbd7522..efa2ad0 100644 --- a/octets.lisp +++ b/octets.lisp @@ -2,12 +2,6 @@ (cl:in-package :binascii) -(defgeneric encoding-tools (format) - (:documentation "Return three values: the basic encoding function for -FORMAT, an encoded-length function for FORMAT, and an encoding table for -FORMAT. The encoding table specifies ASCII characters for encoded -values and is typically a SIMPLE-BASE-STRING.")) - (defgeneric decoding-tools (format &key case-fold map01) (:documentation "Return three values: the basic decoding function for FORMAT, a decoded-length function for FORMAT, and the decoding table for @@ -43,38 +37,6 @@ FORMAT.")) (when errorp (error "Unsupported element-type ~A" element-type))))) -(defun determine-encoding-writer (destination length element-type) - (etypecase destination - (null - (flet ((do-encode (etype transform) - (let ((v (make-array (the fixnum length) :element-type etype)) - (i -1)) - (values #'(lambda (c) (setf (aref v (incf i)) - (funcall transform c))) - v)))) - (declare (inline do-encode)) - (cond - ((eq element-type 'character) - (do-encode 'character #'identity)) - ((eq element-type 'base-char) - (do-encode 'base-char #'identity)) - (t - (do-encode '(unsigned-byte 8) #'char-code))))) - (stream - (cond - ((or (eq element-type 'character) (eq element-type 'base-char)) - (values #'(lambda (c) (write-char c destination)) nil)) - (t - (values #'(lambda (c) (write-byte (char-code c) destination)) nil)))) - (string - (unless (or (eq element-type 'character) (eq element-type 'base-char)) - (error "Cannot output to a string with ~A :ELEMENT-TYPE" element-type)) - (values #'(lambda (c) (vector-push-extend c destination)) nil)) - ((array (unsigned-byte 8) (*)) - (unless (eq element-type 'octet) - (error "Cannot output to an octet vector with ~A :ELEMENT-TYPE" element-type)) - (values #'(lambda (c) (vector-push-extend (char-code c) destination)) nil)))) - (defun decode-octets* (destination string decode-fun length-fun decode-table start end decoded-length) (declare (type function decode-fun length-fun)) @@ -110,34 +72,6 @@ FORMAT.")) #'(lambda (o) (vector-push-extend o destination))) nil)))) -(defun encode-octets (destination octets format - &key (start 0) end (element-type 'base-char) - &allow-other-keys) - "Encode OCTETS between START and END into ASCII characters -according to FORMAT and written to DESTINATION according to ELEMENT-TYPE. - -If DESTINATION is NIL and ELEMENT-TYPE is a subtype of CHARACTER, then a -string is returned. If DESTINATION is NIL and ELEMENT-TYPE is -\(UNSIGNED-BYTE 8) or an equivalent type, then an octet vector is returned. - -If DESTINATION is a STREAM, then the result is written to DESTINATION -using WRITE-CHAR or WRITE-BYTE as chosen by ELEMENT-TYPE. - -If ELEMENT-TYPE is a subtype of CHARACTER, then DESTINATION may also be -a string with a fill pointer. The result is written to the string as if -by use of VECTOR-PUSH-EXTEND. Similarly, if ELEMENT-TYPE -is (UNSIGNED-BYTE 8) or an equivalent type, then DESTINATION may be an -octet vector with a fill pointer." - (multiple-value-bind (encode-fun length-fun table) (encoding-tools format) - (let* ((end (or end (length octets))) - (length (- end start)) - (canonical-element-type (canonicalize-element-type element-type))) - (multiple-value-bind (writer return-value) - (determine-encoding-writer destination (funcall length-fun length) - canonical-element-type) - (funcall encode-fun octets start end table writer) - return-value)))) - (declaim (inline array-data-and-offsets)) (defun array-data-and-offsets (v start end) "Like ARRAY-DISPLACEMENT, only more useful." @@ -150,7 +84,6 @@ octet vector with a fill pointer." #-(or sbcl cmu) (values v start (or end (length v)))) -#|| (defun encode-to-fresh-vector (octets state start end element-type) (declare (type encode-state state)) (multiple-value-bind (input start end) @@ -160,11 +93,11 @@ octet vector with a fill pointer." (declare (type format-descriptor fd)) (flet ((frob (etype encode-fun) (let ((v (make-array length :element-type etype))) - (funcall encode-fun state v octets + (funcall encode-fun state v input 0 length start end t) v))) (declare (inline frob)) - (ecase (canonical-element-type element-type) + (ecase (canonicalize-element-type element-type) (character (frob 'character (fd-octets->string fd))) (base-char @@ -195,8 +128,8 @@ written are returned as multiple values. ELEMENT-TYPE is ignored. If FINISHP is true, then in addition to any encoding of OCTETS, also output any necessary padding required by FORMAT." - (let ((state (find-encoder format)) - (fd (state-descriptor state))) + (let* ((state (find-encoder format)) + (fd (state-descriptor state))) (declare (type encode-state state)) (declare (type format-descriptor fd)) (flet ((frob (encode-fun) @@ -204,19 +137,20 @@ any necessary padding required by FORMAT." (array-data-and-offsets octets start end) (multiple-value-bind (output output-start output-end) (array-data-and-offsets destination output-start output-end) - (funcall encode-fun format-state - output octets + (funcall encode-fun state + output input output-start output-end - input-start input-end nil))))) + input-start input-end finishp))))) (declare (inline frob)) (etypecase destination (null (encode-to-fresh-vector octets state start end element-type)) (string - (frob (fd-octets->string (state-descriptor state)))) + (frob (fd-octets->string fd))) ((array (unsigned-byte 8) (*)) - (frob (fd-octets->octets/encode (state-descriptor state)))))))) + (frob (fd-octets->octets/encode fd))))))) +#|| (defun decode-to-fresh-vector (string state start end) (declare (type decode-state state)) (multiple-value-bind (input start end) From 37d4295c78c251dd761ad2397bf51e21aae2ae18 Mon Sep 17 00:00:00 2001 From: Nathan Froyd Date: Sat, 30 Jan 2010 14:32:36 -0500 Subject: [PATCH 55/76] fix base85 encoder --- base85.lisp | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/base85.lisp b/base85.lisp index 45f4fc0..ba65858 100644 --- a/base85.lisp +++ b/base85.lisp @@ -65,7 +65,7 @@ (declare (type (integer 0 4) pending)) (declare (type (integer 0 5) output-pending)) (flet ((expand-for-output (bits output-group) - (loop for i from 4 downto 0 + (loop for i from 0 to 4 do (multiple-value-bind (b index) (truncate bits 85) (setf bits b (aref output-group i) (aref table index))) @@ -81,12 +81,10 @@ DO-INPUT (when (< pending 4) (setf bits (ldb (byte 32 0) - (logior (ash (aref input input-index) - (- 24 (* pending 8))) - bits))) + (logior (ash bits 8) (aref input input-index)))) (incf input-index) - (incf pending) - (go INPUT-CHECK)) + (unless (= (incf pending) 4) + (go INPUT-CHECK))) EXPAND-FOR-OUTPUT (expand-for-output bits output-group) OUTPUT-CHECK From 1b77512aaba52b3815cc75fce8ac00763a6ac2ed Mon Sep 17 00:00:00 2001 From: Nathan Froyd Date: Sat, 30 Jan 2010 14:38:54 -0500 Subject: [PATCH 56/76] export new ENCODE function --- package.lisp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/package.lisp b/package.lisp index e3ff008..e275641 100644 --- a/package.lisp +++ b/package.lisp @@ -3,4 +3,7 @@ (cl:defpackage :binascii (:use :cl) (:shadow simple-string) - (:export #:encode-octets #:decode-octets)) + (:export + #:encode-octets #:encode + #:decode-octets ;#:decode + )) From 6e321dda8fed2e3875e81303894cdb4701c76f32 Mon Sep 17 00:00:00 2001 From: Nathan Froyd Date: Sat, 30 Jan 2010 14:39:18 -0500 Subject: [PATCH 57/76] use new ENCODE function in tests --- tests/tests.lisp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/tests.lisp b/tests/tests.lisp index 44f5c21..736eee2 100644 --- a/tests/tests.lisp +++ b/tests/tests.lisp @@ -45,7 +45,7 @@ STRING contains any character whose CHAR-CODE is greater than 255." finally (return t))))) (defun encoding-test* (name input encoded-output decoded-length) - (let ((output (binascii:encode-octets nil input name :end decoded-length)) + (let ((output (binascii:encode input name :end decoded-length)) (decoded-input (binascii:decode-octets nil encoded-output name :decoded-length decoded-length))) (when (mismatch output encoded-output) From 24d9bbe27a3658e324c0981d005bcc34eb88ce1e Mon Sep 17 00:00:00 2001 From: Nathan Froyd Date: Sat, 13 Feb 2010 15:29:20 -0500 Subject: [PATCH 58/76] add base16 decoding infrastructure --- base16.lisp | 114 ++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 107 insertions(+), 7 deletions(-) diff --git a/base16.lisp b/base16.lisp index 6fddae8..adc1986 100644 --- a/base16.lisp +++ b/base16.lisp @@ -117,11 +117,110 @@ (base16-encoder state output input output-start output-end input-start input-end lastp #'identity)) -(defun string->octets/base16 () - ) +(defun base16-decode-table (case-fold) + (if case-fold + (case-fold-decode-table *base16-decode-table* + *base16-encode-table*) + *base16-decode-table*)) -(defun octets->octets/decode/base16 () - ) +(defstruct (base16-decode-state + (:include decode-state) + (:copier nil) + (:constructor %make-base16-decode-state + (table + &aux (descriptor (base16-format-descriptor))))) + (bits 0 :type (unsigned-byte 8)) + (n-bits 0 :type fixnum) + (table *base16-decode-table* :read-only t :type decode-table)) + +(defun make-base16-decode-state (case-fold map01) + (declare (ignore map01)) + (%make-base16-decode-state (base16-decode-table case-fold))) + +(defun make-hex-decode-state (case-fold map01) + (declare (ignore case-fold map01)) + (%make-base16-decode-state (base16-decode-table t))) + +(defun base16-decoder (state output input + output-index output-end + input-index input-end lastp converter) + (declare (type base16-decode-state state)) + (declare (type simple-octet-vector output)) + (declare (type index output-index output-end input-index input-end)) + (declare (type function converter)) + (let ((bits (base16-decode-state-bits state)) + (n-bits (base16-decode-state-n-bits state)) + (table (base16-decode-state-table state))) + (declare (type (unsigned-byte 8) bits)) + (tagbody + START + (when (base16-decode-state-finished-input-p state) + (go FLUSH-BITS)) + OUTPUT-AVAILABLE-CHECK + (when (< n-bits 8) + (go INPUT-AVAILABLE-CHECK)) + OUTPUT-SPACE-CHECK + (when (>= output-index output-end) + (go DONE)) + DO-OUTPUT + (setf (aref output output-index) bits + bits 0 + n-bits 0) + (incf output-index) + (go INPUT-AVAILABLE-CHECK) + INPUT-AVAILABLE-CHECK + (when (>= input-index input-end) + (go DONE)) + DO-INPUT + (assert (< n-bits 8)) + (let* ((v (aref input input-index)) + (c (dtref table (funcall converter v)))) + (when (= c +dt-invalid+) + (error "invalid hex digit ~A at position ~D" v input-index)) + (incf input-index) + (cond + ((= n-bits 0) + (setf bits (* (logand c #xf) 16) + n-bits 4) + (go INPUT-AVAILABLE-CHECK)) + ((= n-bits 4) + (setf bits (+ bits (logand c #xf)) + n-bits 8) + (go OUTPUT-SPACE-CHECk)))) + DONE + (unless lastp + (go RESTORE-STATE)) + (setf (base16-decode-state-finished-input-p state) t) + FLUSH-BITS + (when (zerop n-bits) + (go RESTORE-STATE)) + FLUSH-OUTPUT-CHECK + (when (>= output-index output-end) + (go RESTORE-STATE)) + DO-FLUSH-OUTPUT + (when (= n-bits 4) + (error "attempting to decode an odd number of hex digits")) + (setf (aref output output-index) bits + bits 0 + n-bits 0) + RESTORE-STATE + (setf (base16-decode-state-n-bits state) n-bits + (base16-decode-state-bits state) bits)) + (values input-index output-index))) + +(defun string->octets/base16 (state output input + output-index output-end + input-index input-end lastp) + (declare (type simple-string input)) + (base16-decoder state output input output-index output-end + input-index input-end lastp #'char-code)) + +(defun octets->octets/decode/base16 (state output input + output-index output-end + input-index input-end lastp) + (declare (type simple-octet-vector input)) + (base16-decoder state output input output-index output-end + input-index input-end lastp #'identity)) (defun decode-octets-base16 (string start end length table writer) (declare (type index start end)) @@ -138,7 +237,8 @@ (error "Invalid hex digit ~A" char1)) (when (= v2 +dt-invalid+) (error "Invalid hex digit ~A" char2)) - (funcall writer (+ (* v1 16) v2)))))) + (funcall writer (+ (* (logand v1 #xf) 16) + (logand v2 #xf))))))) (declare (inline do-decode)) (decode-dispatch string #'do-decode))) @@ -165,7 +265,7 @@ (register-descriptor-and-constructors :base16 (base16-format-descriptor) #'make-base16-encode-state - #'make-base16-encode-state) + #'make-base16-decode-state) (register-descriptor-and-constructors :hex (base16-format-descriptor) #'make-hex-encode-state - #'make-hex-encode-state) + #'make-hex-decode-state) From 85235ce740e9cef2fe5b3b59ddeb7c2cdf61fd57 Mon Sep 17 00:00:00 2001 From: Nathan Froyd Date: Sat, 13 Feb 2010 15:32:15 -0500 Subject: [PATCH 59/76] add base32 decoding infrastructure --- base32.lisp | 150 ++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 140 insertions(+), 10 deletions(-) diff --git a/base32.lisp b/base32.lisp index fd74ee5..86ffc82 100644 --- a/base32.lisp +++ b/base32.lisp @@ -129,20 +129,150 @@ (base32-encoder state output input output-start output-end input-start input-end lastp #'identity)) -(defun string->octets/base32 () - ) +(defvar *base32-decode-table* (make-decode-table *base32-encode-table*)) +(defvar *base32hex-decode-table* (make-decode-table *base32hex-encode-table*)) +(declaim (type decode-table *base32-decode-table* *base32hex-decode-table*)) + +(defun base32-decode-table (case-fold map01) + (let ((table *base32-decode-table*)) + (when map01 + (setf table (copy-seq table)) + (setf (aref table (char-code #\0)) (aref table (char-code #\O))) + (case map01 + ((#\I #\L) (setf (aref table (char-code #\1)) + (aref table (char-code map01)))))) + (when case-fold + (setf table (case-fold-decode-table table *base32-encode-table*))) + table)) + +(defstruct (base32-decode-state + (:include decode-state) + (:copier nil) + (:constructor %make-base32-decode-state + (table + &aux (descriptor (base32-format-descriptor))))) + (bits 0 :type (unsigned-byte 16)) + (n-bits 0 :type (unsigned-byte 8)) + (padding-remaining 0 :type (integer 0 6)) + (table *base32-decode-table* :read-only t :type decode-table)) + +(defun make-base32-decode-state (case-fold map01) + (%make-base32-decode-state (base32-decode-table case-fold map01))) -(defun octets->octets/decode/base32 () - ) +(defun make-base32hex-decode-state (case-fold map01) + (declare (ignore case-fold map01)) + (%make-base32-decode-state *base32hex-decode-table*)) + +(defun base32-decoder (state output input + output-index output-end + input-index input-end lastp converter) + (declare (type base32-decode-state state)) + (declare (type simple-octet-vector output)) + (declare (type index output-index output-end input-index input-end)) + (declare (type function converter)) + (let ((bits (base32-decode-state-bits state)) + (n-bits (base32-decode-state-n-bits state)) + (padding-remaining (base32-decode-state-padding-remaining state)) + (table (base32-decode-state-table state))) + (declare (type (unsigned-byte 16) bits)) + (declare (type fixnum n-bits)) + (declare (type (integer 0 6) padding-remaining)) + (tagbody + PAD-CHECK + (when (base32-decode-state-finished-input-p state) + (go EAT-EQUAL-CHECK-PAD)) + OUTPUT-AVAILABLE-CHECK + (when (< n-bits 8) + (go INPUT-AVAILABLE-CHECK)) + OUTPUT-SPACE-CHECK + (when (>= output-index output-end) + (go DONE)) + DO-OUTPUT + (decf n-bits 8) + (setf (aref output output-index) (logand (ash bits (- n-bits)) #xff) + bits (logand bits #xff)) + (incf output-index) + (go INPUT-AVAILABLE-CHECK) + INPUT-AVAILABLE-CHECK + (when (>= input-index input-end) + (go DONE)) + DO-INPUT + (let* ((c (aref input input-index)) + (v (funcall converter c)) + (d (dtref table v))) + (when (= v (funcall converter #\=)) + (go SAW-EQUAL)) + (when (= d +dt-invalid+) + (error "invalid base32 character ~A at position ~D" c input-index)) + (incf input-index) + (setf bits (ldb (byte 16 0) (logior (ash bits 5) d))) + (incf n-bits 5) + (go OUTPUT-AVAILABLE-CHECK)) + DONE + (unless lastp + (go RESTORE-STATE)) + SAW-EQUAL + (setf (base32-decode-state-finished-input-p state) t) + ;; A complete base32 group is: + ;; + ;; vvvvvvvv wwwwwwww xxxxxxxx yyyyyyyy zzzzzzzz + ;; + ;; which gets encoded by: + ;; + ;; vvvvv vvvww wwwww wxxxx xxxxy yyyyy yyzzz zzzzz + ;; + ;; so the intermediate bits left are: 3 1 4 2 0 + ;; corresponding to padding amounts : 6 4 3 1 0 (in characters) + ;; + ;; but we also have to handle cases where we start padding too + ;; soon: we can't handle padding after seeing 1 group of 5, 3 + ;; groups of 5, 4 groups of 5 or 6 groups of five. those + ;; correspond to 5 bits remaining (having not seen the 3 v's), 7 + ;; bits remaining (having not seen the 1 w), 4 bits remaining + ;; (having not seen the 4 x's), and 6 bits remaining (having not + ;; seen the 2 y's). + (let ((n-pad-chars #.(make-array 5 :initial-contents '(0 4 1 6 3) + :element-type 'fixnum))) + (if (<= n-bits 4) + (setf padding-remaining (aref n-pad-chars n-bits)) + (error "invalid base32 input"))) + EAT-EQUAL-CHECK-PAD + (when (zerop padding-remaining) + (go RESTORE-STATE)) + EAT-EQUAL-CHECK-INPUT + (when (>= input-index input-end) + (go RESTORE-STATE)) + EAT-EQUAL + (let ((v (aref input input-index))) + (unless (= (funcall converter v) (funcall converter #\=)) + (error "invalid base32 input ~A at position ~D" v input-index)) + (incf input-index) + (decf padding-remaining) + (go EAT-EQUAL-CHECK-PAD)) + RESTORE-STATE + (setf (base32-decode-state-n-bits state) n-bits + (base32-decode-state-bits state) bits + (base32-decode-state-padding-remaining state) padding-remaining)) + (values input-index output-index))) + +(defun string->octets/base32 (state output input + output-index output-end + input-index input-end lastp) + (declare (type simple-string input)) + (base32-decoder state output input output-index output-end + input-index input-end lastp #'char-code)) + +(defun octets->octets/decode/base32 (state output input + output-index output-end + input-index input-end lastp) + (declare (type simple-octet-vector input)) + (base32-decoder state output input output-index output-end + input-index input-end lastp #'identity)) (defun encoded-length/base32 (count) "Return the number of characters required to encode COUNT octets in Base32." (* (ceiling count 5) 8)) -(defvar *base32-decode-table* (make-decode-table *base32-encode-table*)) -(defvar *base32hex-decode-table* (make-decode-table *base32hex-encode-table*)) -(declaim (type decode-table *base32-decode-table* *base32hex-decode-table*)) - (defun decode-octets-base32 (string start end length table writer) (declare (type index start end)) (declare (type decode-table table)) @@ -193,7 +323,7 @@ (register-descriptor-and-constructors :base32 (base32-format-descriptor) #'make-base32-encode-state - #'make-base32-encode-state) + #'make-base32-decode-state) (register-descriptor-and-constructors :base32hex (base32-format-descriptor) #'make-base32hex-encode-state - #'make-base32hex-encode-state) + #'make-base32hex-decode-state) From 62c6ec92592e2f6ade652fb609b4fbf338fad1b2 Mon Sep 17 00:00:00 2001 From: Nathan Froyd Date: Sat, 13 Feb 2010 15:29:38 -0500 Subject: [PATCH 60/76] add base64 decoding infrastructure --- base64.lisp | 119 ++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 111 insertions(+), 8 deletions(-) diff --git a/base64.lisp b/base64.lisp index 71e5671..7be5d6b 100644 --- a/base64.lisp +++ b/base64.lisp @@ -157,12 +157,6 @@ (base64-encoder state output input output-start output-end input-start input-end lastp #'identity)) -(defun string->octets/base64 () - ) - -(defun octets->octets/decode/base64 () - ) - (defun encoded-length/base64 (count) "Return the number of characters required to encode COUNT octets in Base64." (* (ceiling count 3) 4)) @@ -175,6 +169,115 @@ (make-decode-table *base64url-encode-table*)) (declaim (type decode-table *base64url-decode-table*)) +(defstruct (base64-decode-state + (:include decode-state) + (:copier nil) + (:constructor %make-base64-decode-state + (table + &aux (descriptor (base64-format-descriptor))))) + (bits 0 :type (unsigned-byte 16)) + (n-bits 0 :type (unsigned-byte 8)) + (padding-remaining 0 :type (integer 0 3)) + (table *base64-decode-table* :read-only t :type decode-table)) + +(defun make-base64-decode-state (case-fold map01) + (declare (ignore case-fold map01)) + (%make-base64-decode-state *base64-decode-table*)) + +(defun make-base64url-decode-state (case-fold map01) + (declare (ignore case-fold map01)) + (%make-base64-decode-state *base64url-decode-table*)) + +(defun base64-decoder (state output input + output-index output-end + input-index input-end lastp converter) + (declare (type base64-decode-state state)) + (declare (type simple-octet-vector output)) + (declare (type index output-index output-end input-index input-end)) + (declare (type function converter)) + (let ((bits (base64-decode-state-bits state)) + (n-bits (base64-decode-state-n-bits state)) + (padding-remaining (base64-decode-state-padding-remaining state)) + (table (base64-decode-state-table state))) + (declare (type (unsigned-byte 16) bits)) + (declare (type fixnum n-bits)) + (declare (type (integer 0 6) padding-remaining)) + (tagbody + PAD-CHECK + (when (base64-decode-state-finished-input-p state) + (go EAT-EQUAL-CHECK-PAD)) + OUTPUT-AVAILABLE-CHECK + (when (< n-bits 8) + (go INPUT-AVAILABLE-CHECK)) + OUTPUT-SPACE-CHECK + (when (>= output-index output-end) + (go DONE)) + DO-OUTPUT + (decf n-bits 8) + (setf (aref output output-index) (logand (ash bits (- n-bits)) #xff) + bits (logand bits #xff)) + (incf output-index) + (go INPUT-AVAILABLE-CHECK) + INPUT-AVAILABLE-CHECK + (when (>= input-index input-end) + (go DONE)) + DO-INPUT + (let* ((c (aref input input-index)) + (v (funcall converter c)) + (d (dtref table v))) + (when (= v (funcall converter #\=)) + (go SAW-EQUAL)) + (when (= d +dt-invalid+) + (error "invalid base64 character ~A at position ~D" c input-index)) + (incf input-index) + (setf bits (ldb (byte 16 0) (logior (ash bits 6) d))) + (incf n-bits 6) + (go OUTPUT-AVAILABLE-CHECK)) + DONE + (unless lastp + (go RESTORE-STATE)) + SAW-EQUAL + (setf (base64-decode-state-finished-input-p state) t) + (cond + ((zerop n-bits) + (go RESTORE-STATE)) + ((= n-bits 2) + (setf padding-remaining 3)) + ((= n-bits 4) + (setf padding-remaining 2))) + EAT-EQUAL-CHECK-PAD + (when (zerop padding-remaining) + (go RESTORE-STATE)) + EAT-EQUAL-CHECK-INPUT + (when (>= input-index input-end) + (go RESTORE-STATE)) + EAT-EQUAL + (let ((v (aref input input-index))) + (unless (= (funcall converter v) (funcall converter #\=)) + (error "invalid base64 input ~A at position ~D" v input-index)) + (incf input-index) + (decf padding-remaining) + (go EAT-EQUAL-CHECK-PAD)) + RESTORE-STATE + (setf (base64-decode-state-n-bits state) n-bits + (base64-decode-state-bits state) bits + (base64-decode-state-padding-remaining state) padding-remaining)) + (values input-index output-index))) + +(defun string->octets/base64 (state output input + output-index output-end + input-index input-end lastp) + (declare (type simple-string input)) + (base64-decoder state output input output-index output-end + input-index input-end lastp #'char-code)) + +(defun octets->octets/decode/base64 (state output input + output-index output-end + input-index input-end lastp) + (declare (type simple-octet-vector input)) + (base64-decoder state output input output-index output-end + input-index input-end lastp #'identity)) + (defun decode-octets-base64 (string start end length table writer) (declare (type index start end)) (declare (type function writer)) @@ -215,7 +318,7 @@ (register-descriptor-and-constructors :base64 (base64-format-descriptor) #'make-base64-encode-state - #'make-base64-encode-state) + #'make-base64-decode-state) (register-descriptor-and-constructors :base64url (base64-format-descriptor) #'make-base64url-encode-state - #'make-base64url-encode-state) + #'make-base64url-decode-state) From fc15b65ecd39cc2ef92c19f837df1a42555adb24 Mon Sep 17 00:00:00 2001 From: Nathan Froyd Date: Mon, 8 Feb 2010 22:41:29 -0500 Subject: [PATCH 61/76] fix BASE32-ENCODER indentation --- base32.lisp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/base32.lisp b/base32.lisp index 86ffc82..3bb15e8 100644 --- a/base32.lisp +++ b/base32.lisp @@ -56,10 +56,10 @@ (declare (type fixnum n-bits)) (declare (type (simple-array fixnum (5)) n-pad-chars)) (tagbody - PAD-CHECK + PAD-CHECK (when (base32-encode-state-finished-input-p state) (go PAD)) - INPUT-CHECK + INPUT-CHECK (when (>= input-index input-end) (go DONE)) DO-INPUT From 9a6728792825f413cdb9b162c5d89208d9ff339d Mon Sep 17 00:00:00 2001 From: Nathan Froyd Date: Sat, 13 Feb 2010 15:29:44 -0500 Subject: [PATCH 62/76] add base85 decoding infrastructure --- base85.lisp | 126 +++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 119 insertions(+), 7 deletions(-) diff --git a/base85.lisp b/base85.lisp index ba65858..05b1054 100644 --- a/base85.lisp +++ b/base85.lisp @@ -154,15 +154,127 @@ (base85-encoder state output input output-start output-end input-start input-end lastp #'identity)) -(defun string->octets/base85 () - ) - -(defun octets->octets/decode/base85 () - ) - (defvar *base85-decode-table* (make-decode-table *base85-encode-table*)) (declaim (type decode-table *base85-decode-table*)) +(defstruct (base85-decode-state + (:include decode-state) + (:copier nil) + (:constructor %make-base85-decode-state + (&aux (descriptor (base85-format-descriptor))))) + (bits 0 :type (unsigned-byte 32)) + (pending 0 :type (integer 0 5)) + (output-pending 0 :type (integer 0 4)) + (table *base85-decode-table* :read-only t :type decode-table)) + +(defun make-base85-decode-state (case-fold map01) + (declare (ignore case-fold map01)) + (%make-base85-decode-state)) + +(defun base85-decoder (state output input + output-index output-end + input-index input-end lastp converter) + (declare (type base85-decode-state state)) + (declare (type simple-octet-vector output)) + (declare (type index output-index output-end input-index input-end)) + (declare (type function converter)) + (let ((bits (base85-decode-state-bits state)) + (pending (base85-decode-state-pending state)) + (output-pending (base85-decode-state-output-pending state)) + (table (base85-decode-state-table state))) + (declare (type (unsigned-byte 32) bits)) + (declare (type (integer 0 5) pending)) + (declare (type (integer 0 4) output-pending)) + (tagbody + FINISHED-CHECK + (when (base85-decode-state-finished-input-p state) + (go FLUSH-BITS)) + OUTPUT-AVAILABLE-CHECK + (when (zerop output-pending) + (go INPUT-AVAILABLE-CHECK)) + OUTPUT-SPACE-CHECK + (when (>= output-index output-end) + (go DONE)) + DO-OUTPUT + (setf (aref output output-index) + (ldb (byte 8 (* (decf output-pending) 8)) bits)) + (incf output-index) + (cond + ((zerop output-pending) + (setf bits 0) + (setf pending 0) + (setf output-pending 0) + (go INPUT-AVAILABLE-CHECK)) + (t + (go OUTPUT-SPACE-CHECK))) + INPUT-AVAILABLE-CHECK + (when (>= input-index input-end) + (go DONE)) + DO-INPUT + (cond + ((< pending 5) + (let* ((c (aref input input-index)) + (v (funcall converter c)) + (d (dtref table v))) + (when (= d +dt-invalid+) + (error "invalid base85 character ~A at position ~D" c input-index)) + ;; FIXME: check for overflow. + (setf bits (+ (* bits 85) d)) + (incf pending) + (incf input-index) + (go INPUT-AVAILABLE-CHECK))) + (t + (setf output-pending 4) + (go OUTPUT-SPACE-CHECK))) + DONE + (unless lastp + (go RESTORE-STATE)) + (setf (base85-decode-state-finished-input-p state) t) + ;; We should *always* have a complete group or nothing at this + ;; point. + EOT-VALIDITY-CHECK + (when (<= 1 pending 4) + (error "invalid base85 input")) + (setf output-pending (if (zerop pending) 0 4)) + FLUSH-BITS + (when (zerop output-pending) + (go RESTORE-STATE)) + FLUSH-OUTPUT-CHECK + (when (>= output-index output-end) + (go RESTORE-STATE)) + DO-FLUSH-OUTPUT + (when (> output-pending 0) + (setf (aref output output-index) + (ldb (byte 8 (* (decf output-pending) 8)) bits)) + (incf output-index) + (cond + ((zerop output-pending) + (setf bits 0) + (setf pending 0) + (setf output-pending 0) + (go RESTORE-STATE)) + (t + (go FLUSH-OUTPUT-CHECK)))) + RESTORE-STATE + (setf (base85-decode-state-bits state) bits + (base85-decode-state-pending state) pending + (base85-decode-state-output-pending state) output-pending)) + (values input-index output-index))) + +(defun string->octets/base85 (state output input + output-index output-end + input-index input-end lastp) + (declare (type simple-string input)) + (base85-decoder state output input output-index output-end + input-index input-end lastp #'char-code)) + +(defun octets->octets/decode/base85 (state output input + output-index output-end + input-index input-end lastp) + (declare (type simple-octet-vector input)) + (base85-decoder state output input output-index output-end + input-index input-end lastp #'identity)) + (defun decoded-length-base85 (length) (multiple-value-bind (n-groups rem) (truncate length 5) (unless (zerop rem) @@ -200,5 +312,5 @@ (register-descriptor-and-constructors :base85 (base85-format-descriptor) #'make-base85-encode-state - #'make-base85-encode-state) + #'make-base85-decode-state) From 3be47ec23734f8a4cb968d2bbf46dd34aee77cdd Mon Sep 17 00:00:00 2001 From: Nathan Froyd Date: Sat, 13 Feb 2010 15:29:55 -0500 Subject: [PATCH 63/76] add ascii85 decoding infrastructure --- ascii85.lisp | 134 ++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 127 insertions(+), 7 deletions(-) diff --git a/ascii85.lisp b/ascii85.lisp index 4966cea..219fedf 100644 --- a/ascii85.lisp +++ b/ascii85.lisp @@ -150,15 +150,135 @@ (ascii85-encoder state output input output-start output-end input-start input-end lastp #'identity)) -(defun string->octets/ascii85 () - ) - -(defun octets->octets/decode/ascii85 () - ) - (defvar *ascii85-decode-table* (make-decode-table *ascii85-encode-table*)) (declaim (type decode-table *ascii85-decode-table*)) +(defstruct (ascii85-decode-state + (:include decode-state) + (:copier nil) + (:constructor %make-ascii85-decode-state + (&aux (descriptor (ascii85-format-descriptor))))) + (bits 0 :type (unsigned-byte 32)) + (pending 0 :type (integer 0 5)) + (output-pending 0 :type (integer 0 4)) + (table *ascii85-decode-table* :read-only t :type decode-table)) + +(defun make-ascii85-decode-state (case-fold map01) + (declare (ignore case-fold map01)) + (%make-ascii85-decode-state)) + +(defun ascii85-decoder (state output input + output-index output-end + input-index input-end lastp converter) + (declare (type ascii85-decode-state state)) + (declare (type simple-octet-vector output)) + (declare (type index output-index output-end input-index input-end)) + (declare (type function converter)) + (let ((bits (ascii85-decode-state-bits state)) + (pending (ascii85-decode-state-pending state)) + (output-pending (ascii85-decode-state-output-pending state)) + (table (ascii85-decode-state-table state))) + (declare (type (unsigned-byte 32) bits)) + (declare (type (integer 0 5) pending)) + (declare (type (integer 0 4) output-pending)) + (tagbody + FINISHED-CHECK + (when (ascii85-decode-state-finished-input-p state) + (go FLUSH-BITS)) + OUTPUT-AVAILABLE-CHECK + (when (zerop output-pending) + (go INPUT-AVAILABLE-CHECK)) + OUTPUT-SPACE-CHECK + (when (>= output-index output-end) + (go DONE)) + DO-OUTPUT + (setf (aref output output-index) + (ldb (byte 8 (* (decf output-pending) 8)) bits)) + (incf output-index) + (cond + ((zerop output-pending) + (setf bits 0) + (setf pending 0) + (setf output-pending 0) + (go INPUT-AVAILABLE-CHECK)) + (t + (go OUTPUT-SPACE-CHECK))) + INPUT-AVAILABLE-CHECK + (when (>= input-index input-end) + (go DONE)) + DO-INPUT + (cond + ((< pending 5) + (let* ((c (aref input input-index)) + (v (funcall converter c)) + (d (dtref table v))) + (cond + ((eql c (funcall converter #\z)) + (unless (zerop pending) + (error "z found in the middle of an ascii85 group")) + (incf input-index) + (setf output-pending 4) + (go OUTPUT-SPACE-CHECK)) + ((= d +dt-invalid+) + (error "invalid ascii85 character ~A at position ~D" c input-index)) + (t + ;; FIXME: check for overflow. + (setf bits (+ (* bits 85) d)) + (incf pending) + (incf input-index) + (go INPUT-AVAILABLE-CHECK))))) + (t + (setf output-pending 4) + (go OUTPUT-SPACE-CHECK))) + DONE + (unless lastp + (go RESTORE-STATE)) + (setf (ascii85-decode-state-finished-input-p state) t) + ;; We should *always* have a complete group or nothing at this + ;; point. + EOT-VALIDITY-CHECK + (when (<= 1 pending 4) + (error "invalid ascii85 input")) + (setf output-pending (if (zerop pending) 0 4)) + FLUSH-BITS + (when (zerop output-pending) + (go RESTORE-STATE)) + FLUSH-OUTPUT-CHECK + (when (>= output-index output-end) + (go RESTORE-STATE)) + DO-FLUSH-OUTPUT + (when (> output-pending 0) + (setf (aref output output-index) + (ldb (byte 8 (* (decf output-pending) 8)) bits)) + (incf output-index) + (cond + ((zerop output-pending) + (setf bits 0) + (setf pending 0) + (setf output-pending 0) + (go RESTORE-STATE)) + (t + (go FLUSH-OUTPUT-CHECK)))) + RESTORE-STATE + (setf (ascii85-decode-state-bits state) bits + (ascii85-decode-state-pending state) pending + (ascii85-decode-state-output-pending state) output-pending)) + (values input-index output-index))) + +(defun string->octets/ascii85 (state output input + output-index output-end + input-index input-end lastp) + (declare (type simple-string input)) + (ascii85-decoder state output input output-index output-end + input-index input-end lastp #'char-code)) + +(defun octets->octets/decode/ascii85 (state output input + output-index output-end + input-index input-end lastp) + (declare (type simple-octet-vector input)) + (ascii85-decoder state output input output-index output-end + input-index input-end lastp #'identity)) + (defun decoded-length-ascii85 (length) ;; FIXME: There's nothing smart we can do without scanning the string. ;; We have to assume the worst case, that all the characters in the @@ -213,4 +333,4 @@ (register-descriptor-and-constructors :ascii85 (ascii85-format-descriptor) #'make-ascii85-encode-state - #'make-ascii85-encode-state) + #'make-ascii85-decode-state) From e87c3cc562b9811435f9b48a2d144f633d878f8f Mon Sep 17 00:00:00 2001 From: Nathan Froyd Date: Sat, 13 Feb 2010 15:32:42 -0500 Subject: [PATCH 64/76] activate new decoding functionality --- ascii85.lisp | 46 ----------------- base16.lisp | 36 ------------- base32.lisp | 45 ----------------- base64.lisp | 35 ------------- base85.lisp | 29 ----------- octets.lisp | 128 +++++++++-------------------------------------- package.lisp | 2 +- tests/tests.lisp | 6 +-- 8 files changed, 27 insertions(+), 300 deletions(-) diff --git a/ascii85.lisp b/ascii85.lisp index 219fedf..4ab2145 100644 --- a/ascii85.lisp +++ b/ascii85.lisp @@ -285,52 +285,6 @@ ;; string are #\z. (* length 5)) -(defun decode-octets-ascii85 (string start end length table writer) - (declare (type index start end)) - (declare (type function writer)) - (declare (type decode-table table)) - (flet ((do-decode (transform) - (do ((i 0) - (acc 0)) - ((>= start end) - (unless (zerop i) - (when (= i 1) - (error "corrupt ascii85 group")) - (dotimes (j (- 5 i)) - (setf acc (+ (* acc 85) 84))) - (dotimes (j (1- i)) - (funcall writer (ldb (byte 8 (* (- 3 j) 8)) acc))))) - (cond - ((>= i 5) - (unless (< acc (ash 1 32)) - (error "invalid ascii85 sequence")) - (dotimes (i 4) - (funcall writer (ldb (byte 8 (* (- 3 i) 8)) acc))) - (setf i 0 - acc 0)) - (t - (let* ((b (funcall transform (aref string start))) - (d (dtref table b))) - (incf start) - (cond - ((= b #.(char-code #\z)) - (unless (zerop i) - (error "z found in the middle of an ascii85 group")) - (dotimes (i 4) - (funcall writer 0))) - ((= d +dt-invalid+) - (error "invalid ascii85 character ~X" b)) - (t - (incf i) - (setf acc (+ (* acc 85) d)))))))))) - (declare (inline do-decode)) - (decode-dispatch string #'do-decode))) - -(defmethod decoding-tools ((format (eql :ascii85)) &key case-fold map01) - (declare (ignorable case-fold map01)) - (values #'decode-octets-ascii85 #'decoded-length-ascii85 - *ascii85-decode-table*)) - (register-descriptor-and-constructors :ascii85 (ascii85-format-descriptor) #'make-ascii85-encode-state #'make-ascii85-decode-state) diff --git a/base16.lisp b/base16.lisp index adc1986..22646ea 100644 --- a/base16.lisp +++ b/base16.lisp @@ -222,47 +222,11 @@ (base16-decoder state output input output-index output-end input-index input-end lastp #'identity)) -(defun decode-octets-base16 (string start end length table writer) - (declare (type index start end)) - (declare (type function writer)) - (declare (type decode-table table)) - (declare (optimize (speed 3))) - (flet ((do-decode (transform) - (loop for i from start below end by 2 - do (let* ((char1 (aref string i)) - (char2 (aref string (1+ i))) - (v1 (dtref table (funcall transform char1))) - (v2 (dtref table (funcall transform char2)))) - (when (= v1 +dt-invalid+) - (error "Invalid hex digit ~A" char1)) - (when (= v2 +dt-invalid+) - (error "Invalid hex digit ~A" char2)) - (funcall writer (+ (* (logand v1 #xf) 16) - (logand v2 #xf))))))) - (declare (inline do-decode)) - (decode-dispatch string #'do-decode))) - (defun decoded-length-base16 (length) (unless (evenp length) (error "cannot decode an odd number of base16 characters")) (truncate length 2)) -(defmethod decoding-tools ((format (eql :base16)) &key case-fold map01) - (declare (ignorable case-fold map01)) - (values #'decode-octets-base16 - #'decoded-length-base16 - (if case-fold - (case-fold-decode-table *base16-decode-table* - *base16-encode-table*) - *base16-decode-table*))) - -(defmethod decoding-tools ((format (eql :hex)) &key case-fold map01) - (declare (ignorable case-fold map01)) - (values #'decode-octets-base16 - #'decoded-length-base16 - (case-fold-decode-table *base16-decode-table* - *base16-encode-table*))) - (register-descriptor-and-constructors :base16 (base16-format-descriptor) #'make-base16-encode-state #'make-base16-decode-state) diff --git a/base32.lisp b/base32.lisp index 3bb15e8..6f448f9 100644 --- a/base32.lisp +++ b/base32.lisp @@ -273,54 +273,9 @@ "Return the number of characters required to encode COUNT octets in Base32." (* (ceiling count 5) 8)) -(defun decode-octets-base32 (string start end length table writer) - (declare (type index start end)) - (declare (type decode-table table)) - (declare (type function writer)) - (flet ((do-decode (transform) - (loop with bits of-type (unsigned-byte 16) = 0 - with n-bits of-type (unsigned-byte 8) = 0 - for i from start below end - for char = (aref string i) - for value = (dtref table (funcall transform char)) - do (cond - ((>= value 0) - (setf bits (logand (logior (ash bits 5) value) #xffff)) - (incf n-bits 5) - (when (>= n-bits 8) - (decf n-bits 8) - (funcall writer (logand (ash bits (- n-bits)) #xff)) - (setf bits (logand bits #xff)))) - ((eql (funcall transform char) - (funcall transform #\=))) - (t - (error "bad character ~A in base32 decoding" char)))))) - (declare (inline do-decode)) - (decode-dispatch string #'do-decode))) - (defun decoded-length-base32 (length) (* (ceiling length 8) 5)) -(defmethod decoding-tools ((format (eql :base32)) &key case-fold map01) - (declare (ignorable case-fold map01)) - (values #'decode-octets-base32 - #'decoded-length-base32 - (let ((table *base32-decode-table*)) - (when map01 - (setf table (copy-seq table)) - (setf (aref table (char-code #\0)) (aref table (char-code #\O))) - (case map01 - ((#\I #\L) (setf (aref table (char-code #\1)) - (aref table (char-code map01)))))) - (when case-fold - (setf table (case-fold-decode-table table *base32-encode-table*))) - table))) - -(defmethod decoding-tools ((format (eql :base32hex)) &key case-fold map01) - (declare (ignorable case-fold map01)) - (values #'decode-octets-base32 #'decoded-length-base32 - *base32hex-decode-table*)) - (register-descriptor-and-constructors :base32 (base32-format-descriptor) #'make-base32-encode-state #'make-base32-decode-state) diff --git a/base64.lisp b/base64.lisp index 7be5d6b..845d88a 100644 --- a/base64.lisp +++ b/base64.lisp @@ -278,44 +278,9 @@ (base64-decoder state output input output-index output-end input-index input-end lastp #'identity)) -(defun decode-octets-base64 (string start end length table writer) - (declare (type index start end)) - (declare (type function writer)) - (declare (type decode-table table)) - (flet ((do-decode (transform) - (loop with bits of-type (unsigned-byte 16) = 0 - with n-bits of-type (unsigned-byte 8) = 0 - for i from start below end - for char = (aref string i) - for value = (dtref table (funcall transform char)) - do (cond - ((>= value 0) - (setf bits (logand (logior (ash bits 6) value) #xffff)) - (incf n-bits 6) - (when (>= n-bits 8) - (decf n-bits 8) - (funcall writer (logand (ash bits (- n-bits)) #xff)) - (setf bits (logand bits #xff)))) - ((eql (funcall transform char) - (funcall transform #\=))) - ((= value +dt-invalid+) - (error "bad character ~A in base64 decoding" char)))))) - (declare (inline do-decode)) - (decode-dispatch string #'do-decode))) - (defun decoded-length-base64 (length) (* (ceiling length 4) 3)) -(defmethod decoding-tools ((format (eql :base64)) &key case-fold map01) - (declare (ignorable case-fold map01)) - (values #'decode-octets-base64 #'decoded-length-base64 - *base64-decode-table*)) - -(defmethod decoding-tools ((format (eql :base64url)) &key case-fold map01) - (declare (ignorable case-fold map01)) - (values #'decode-octets-base64 #'decoded-length-base64 - *base64url-decode-table*)) - (register-descriptor-and-constructors :base64 (base64-format-descriptor) #'make-base64-encode-state #'make-base64-decode-state) diff --git a/base85.lisp b/base85.lisp index 05b1054..a72189b 100644 --- a/base85.lisp +++ b/base85.lisp @@ -281,35 +281,6 @@ (error "base85 input length ~D must be a multiple of 5" length)) (* n-groups 4))) -(defun decode-octets-base85 (string start end length table writer) - (declare (type index start end)) - (declare (type function writer)) - (declare (type decode-table table)) - (flet ((do-decode (transform) - (loop while (< start end) - do (do ((i 0 (1+ i)) - (acc 0)) - ((>= i 5) - (incf start 5) - (unless (< acc (ash 1 32)) - (error "invalid base85 sequence")) - (let ((count (min length 4))) - (dotimes (i count) - (funcall writer (ldb (byte 8 (* (- 3 i) 8)) acc))) - (decf length count))) - (let* ((b (funcall transform (aref string (+ start i)))) - (d (dtref table b))) - (when (= d +dt-invalid+) - (error "invalid base85 character: ~X" b)) - (setf acc (+ (* acc 85) d))))))) - (declare (inline do-decode)) - (decode-dispatch string #'do-decode))) - -(defmethod decoding-tools ((format (eql :base85)) &key case-fold map01) - (declare (ignorable case-fold map01)) - (values #'decode-octets-base85 #'decoded-length-base85 - *base85-decode-table*)) - (register-descriptor-and-constructors :base85 (base85-format-descriptor) #'make-base85-encode-state #'make-base85-decode-state) diff --git a/octets.lisp b/octets.lisp index efa2ad0..42dd9eb 100644 --- a/octets.lisp +++ b/octets.lisp @@ -2,17 +2,6 @@ (cl:in-package :binascii) -(defgeneric decoding-tools (format &key case-fold map01) - (:documentation "Return three values: the basic decoding function for -FORMAT, a decoded-length function for FORMAT, and the decoding table for -FORMAT. CASE-FOLD is a generalized boolean indicating whether to -compare characters case-insensitively. MAP01 should be either NIL, -#\\I, or #\\L; if MAP01 is not NIL, then its value indicates what -character #\\1 maps to. If MAP01 is not NIL, then \\#0 maps to O. - -CASE-FOLD and MAP01 are silently ignored if they do not apply to -FORMAT.")) - (defun case-fold-decode-table (decode-table encode-table) (loop with table = (copy-seq decode-table) for c across encode-table @@ -37,41 +26,6 @@ FORMAT.")) (when errorp (error "Unsupported element-type ~A" element-type))))) -(defun decode-octets* (destination string decode-fun length-fun decode-table - start end decoded-length) - (declare (type function decode-fun length-fun)) - (let* ((end (or end (length string))) - ;; For better or worse, a provided decoded length from the user - ;; always wins. But LENGTH-FUN may do some additional - ;; validation of its own, so we want to make sure to call it - ;; too (the additional cost of doing so is negligible). - (guessed-length (funcall length-fun (- end start))) - (length (or decoded-length guessed-length))) - (declare (type index length)) - (etypecase destination - (null - (let* ((octets (make-array length :element-type '(unsigned-byte 8))) - (i -1) - (actual-length 0)) - (declare (type index actual-length)) - (funcall decode-fun string start end length decode-table - #'(lambda (o) - (setf (aref octets (incf i)) o) - (incf actual-length))) - (if (= actual-length length) - octets - ;; FIXME: if we wanted to dig into SBCL internals, there's - ;; a less consy way: (SB-KERNEL:%SHRINK-VECTOR). - (subseq octets 0 actual-length)))) - (stream - (funcall decode-fun string start end length decode-table - #'(lambda (o) (write-byte o destination))) - nil) - ((array (unsigned-byte 8) (*)) - (funcall decode-fun string start end length decode-table - #'(lambda (o) (vector-push-extend o destination))) - nil)))) - (declaim (inline array-data-and-offsets)) (defun array-data-and-offsets (v start end) "Like ARRAY-DISPLACEMENT, only more useful." @@ -150,16 +104,25 @@ any necessary padding required by FORMAT." ((array (unsigned-byte 8) (*)) (frob (fd-octets->octets/encode fd))))))) -#|| -(defun decode-to-fresh-vector (string state start end) +(defun decode-to-fresh-vector (string state start end decoded-length) (declare (type decode-state state)) (multiple-value-bind (input start end) (array-data-and-offsets string start end) (let* ((fd (state-descriptor state)) - (length (funcall (fd-decoded-length fd) (- end start)))) + (length (or decoded-length + (funcall (fd-decoded-length fd) (- end start))))) (declare (type format-descriptor fd)) (flet ((frob (v decode-fun) - (funcall decode-fun state v string 0 length start end))) + (multiple-value-bind (input-index output-index) + (funcall decode-fun state v input 0 length start end t) + ;; FIXME: we should check to see if we actually + ;; consumed all the input. If we didn't, then we need + ;; to reallocate V and continue decoding. Even though + ;; we said LASTP=T. Hmmm. + (declare (ignore input-index)) + (if (= output-index length) + v + (subseq v 0 output-index))))) (let ((octets (make-array length :element-type '(unsigned-byte 8)))) (etypecase string (simple-string @@ -167,12 +130,13 @@ any necessary padding required by FORMAT." (simple-octet-vector (frob octets (fd-octets->octets/decode fd))))))))) -(defun decode (string format &key (start 0) end case-fold map01) +(defun decode (string format &key (start 0) end case-fold map01 decoded-length) (decode-to-fresh-vector string (find-decoder format case-fold map01) - start end)) + start end decoded-length)) (defun decode-octets (destination string format &key (start 0) end - (output-start 0) output-end case-fold map01 finishp) + (output-start 0) output-end case-fold map01 finishp + decoded-length) "Decode the characters of STRING between START and END into octets according to FORMAT. DECODED-LENGTH indicates the number of decoded octets to expect. DESTINATION may be NIL." @@ -183,32 +147,18 @@ octets to expect. DESTINATION may be NIL." (array-data-and-offsets string start end) (multiple-value-bind (output output-start output-end) (array-data-and-offsets destination output-start output-end) - (funcall decode-fun format-start - output string + (funcall decode-fun state + output input output-start output-end - input-start input-end nil))))) + input-start input-end finishp))))) (declare (inline frob)) (etypecase string (null - (decode-to-fresh-vector string state start end)) + (decode-to-fresh-vector string state start end decoded-length)) (string - (frob (fd-string->octets (state-descriptor state))) + (frob (fd-string->octets (state-descriptor state)))) ((array (unsigned-byte 8) (*)) - (frob (fd-octets->octets/decode (state-descriptor state)))))))))g -||# - -(defun decode-octets (destination string format - &key (start 0) end decoded-length case-fold map01 - &allow-other-keys) - "Decode the characters of STRING between START and END into octets -according to FORMAT. DECODED-LENGTH indictes the number of decoded octets -to expect. DESTINATION may be NIL, an octet vector with a fill-pointer, -or a stream. DECODED-LENGTH does not need to be provided for all formats." - (declare (ignorable decoded-length)) - (multiple-value-bind (decode length table) - (decoding-tools format :case-fold case-fold :map01 map01) - (decode-octets* destination string decode length table - start end decoded-length))) + (frob (fd-octets->octets/decode (state-descriptor state)))))))) (defconstant +dt-invalid+ -1) @@ -230,35 +180,3 @@ or a stream. DECODED-LENGTH does not need to be provided for all formats." (if (>= i 256) +dt-invalid+ (aref table i))) - -;;; By controlling the inlinability of this function, we can specialize -;;; decode functions on simple arrays while going through full calls for -;;; non-simple arrays, thus providing a balance of -;;; performance/functionality/code size. -;;; -;;; Yes, it's a little ugly, but it's because INLINE/NOTINLINE doesn't -;;; work quite right on local functions in some implementations. -;;; Assuming the implementation supports INLINE/NOTINLINE correctly, -;;; this way should work. -(declaim (inline decode-dispatch-wrap)) -(defun decode-dispatch-wrap (decode-fun transform) - (funcall decode-fun transform)) -(declaim (notinline decode-dispatch-wrap)) - -(declaim (inline decode-dispatch)) -(defun decode-dispatch (v decode-fun) - (etypecase v - ;; Probably not worth optimizing for BASE-CHAR vs. CHARACTER. - (simple-string - (locally (declare (inline decode-dispatch-wrap)) - (decode-dispatch-wrap decode-fun #'char-code))) - ((simple-array (unsigned-byte 8) (*)) - (locally (declare (inline decode-dispatch-wrap)) - (decode-dispatch-wrap decode-fun #'identity))) - ;; We're not particularly worried about speed in these two cases. - (string - (locally (declare (notinline decode-dispatch-wrap)) - (decode-dispatch-wrap decode-fun #'char-code))) - ((array (unsigned-byte 8) (*)) - (locally (declare (notinline decode-dispatch-wrap)) - (decode-dispatch-wrap decode-fun #'identity))))) diff --git a/package.lisp b/package.lisp index e275641..27e5124 100644 --- a/package.lisp +++ b/package.lisp @@ -5,5 +5,5 @@ (:shadow simple-string) (:export #:encode-octets #:encode - #:decode-octets ;#:decode + #:decode-octets #:decode )) diff --git a/tests/tests.lisp b/tests/tests.lisp index 736eee2..d542ea6 100644 --- a/tests/tests.lisp +++ b/tests/tests.lisp @@ -46,8 +46,8 @@ STRING contains any character whose CHAR-CODE is greater than 255." (defun encoding-test* (name input encoded-output decoded-length) (let ((output (binascii:encode input name :end decoded-length)) - (decoded-input (binascii:decode-octets nil encoded-output name - :decoded-length decoded-length))) + (decoded-input (binascii:decode encoded-output name + :decoded-length decoded-length))) (when (mismatch output encoded-output) (error "encoding ~A failed on ~A, produced ~A, wanted ~A" name input output encoded-output)) @@ -56,7 +56,7 @@ STRING contains any character whose CHAR-CODE is greater than 255." name encoded-output decoded-input input)))) (defun encoding-test (name hexinput encoded-output &optional decoded-length) - (encoding-test* name (binascii:decode-octets nil hexinput :hex) + (encoding-test* name (binascii:decode hexinput :hex) encoded-output decoded-length)) (defun encoding-test-ascii (name ascii-input encoded-output From fe98b530ad893d93dde2f9c8da88e3a8205a2ef5 Mon Sep 17 00:00:00 2001 From: Nathan Froyd Date: Sat, 13 Feb 2010 14:54:23 -0500 Subject: [PATCH 65/76] fix ENCODE-TO-FRESH-VECTOR unreachable code --- octets.lisp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/octets.lisp b/octets.lisp index 42dd9eb..2c9acd4 100644 --- a/octets.lisp +++ b/octets.lisp @@ -56,7 +56,7 @@ (frob 'character (fd-octets->string fd))) (base-char (frob 'base-char (fd-octets->string fd))) - (ub8 + (octet (frob '(unsigned-byte 8) (fd-octets->octets/encode fd)))))))) (defun encode (octets format &key (start 0) end (element-type 'base-char)) From 799e37b492ac7607f28e24e80036b9ae6f03882d Mon Sep 17 00:00:00 2001 From: Nathan Froyd Date: Tue, 9 Feb 2010 22:28:02 -0500 Subject: [PATCH 66/76] note progress in TODO --- TODO | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/TODO b/TODO index 31ba129..a3d8b63 100644 --- a/TODO +++ b/TODO @@ -41,14 +41,24 @@ We are definitely going to over-allocate if we permit whitespace. Python's binascii module has a few we might consider adding. * convert to async API ** encode -*** TODO base16 -*** TODO base32 -*** TODO base64 -*** TODO base85 -*** TODO ascii85 +*** DONE base16 + CLOSED: [2010-02-09 Tue 22:26] +*** DONE base32 + CLOSED: [2010-02-09 Tue 22:26] +*** DONE base64 + CLOSED: [2010-02-09 Tue 22:27] +*** DONE base85 + CLOSED: [2010-02-09 Tue 22:27] +*** DONE ascii85 + CLOSED: [2010-02-09 Tue 22:27] ** decode -*** TODO base16 -*** TODO base32 -*** TODO base64 -*** TODO base85 -*** TODO ascii85 +*** DONE base16 + CLOSED: [2010-02-09 Tue 22:27] +*** DONE base32 + CLOSED: [2010-02-09 Tue 22:27] +*** DONE base64 + CLOSED: [2010-02-09 Tue 22:27] +*** DONE base85 + CLOSED: [2010-02-13 Sat 15:33] +*** DONE ascii85 + CLOSED: [2010-02-13 Sat 15:33] From 38fd49d97596cf16f38c8bd334ee1a64c0bcf2de Mon Sep 17 00:00:00 2001 From: Nathan Froyd Date: Sat, 13 Feb 2010 15:52:36 -0500 Subject: [PATCH 67/76] add (:PREDICATE NIL) for all defined structures --- ascii85.lisp | 2 ++ base16.lisp | 2 ++ base32.lisp | 2 ++ base64.lisp | 2 ++ base85.lisp | 2 ++ types.lisp | 3 +++ 6 files changed, 13 insertions(+) diff --git a/ascii85.lisp b/ascii85.lisp index 4ab2145..3e0c6b7 100644 --- a/ascii85.lisp +++ b/ascii85.lisp @@ -21,6 +21,7 @@ (defstruct (ascii85-encode-state (:include encode-state) (:copier nil) + (:predicate nil) (:constructor make-ascii85-encode-state (&aux (descriptor (ascii85-format-descriptor))))) (bits 0 :type (unsigned-byte 32)) @@ -156,6 +157,7 @@ (defstruct (ascii85-decode-state (:include decode-state) (:copier nil) + (:predicate nil) (:constructor %make-ascii85-decode-state (&aux (descriptor (ascii85-format-descriptor))))) (bits 0 :type (unsigned-byte 32)) diff --git a/base16.lisp b/base16.lisp index 22646ea..52132ec 100644 --- a/base16.lisp +++ b/base16.lisp @@ -27,6 +27,7 @@ (defstruct (base16-encode-state (:include encode-state) (:copier nil) + (:predicate nil) (:constructor make-base16-encode-state (&aux (descriptor (base16-format-descriptor)) (table *base16-encode-table*))) @@ -126,6 +127,7 @@ (defstruct (base16-decode-state (:include decode-state) (:copier nil) + (:predicate nil) (:constructor %make-base16-decode-state (table &aux (descriptor (base16-format-descriptor))))) diff --git a/base32.lisp b/base32.lisp index 6f448f9..74442cb 100644 --- a/base32.lisp +++ b/base32.lisp @@ -23,6 +23,7 @@ (defstruct (base32-encode-state (:include encode-state) (:copier nil) + (:predicate nil) (:constructor make-base32-encode-state (&aux (descriptor (base32-format-descriptor)) (table *base32-encode-table*))) @@ -148,6 +149,7 @@ (defstruct (base32-decode-state (:include decode-state) (:copier nil) + (:predicate nil) (:constructor %make-base32-decode-state (table &aux (descriptor (base32-format-descriptor))))) diff --git a/base64.lisp b/base64.lisp index 845d88a..069a54e 100644 --- a/base64.lisp +++ b/base64.lisp @@ -24,6 +24,7 @@ (defstruct (base64-encode-state (:include encode-state) (:copier nil) + (:predicate nil) (:constructor make-base64-encode-state (&aux (descriptor (base64-format-descriptor)) (table *base64-encode-table*))) @@ -172,6 +173,7 @@ (defstruct (base64-decode-state (:include decode-state) (:copier nil) + (:predicate nil) (:constructor %make-base64-decode-state (table &aux (descriptor (base64-format-descriptor))))) diff --git a/base85.lisp b/base85.lisp index a72189b..de5d948 100644 --- a/base85.lisp +++ b/base85.lisp @@ -21,6 +21,7 @@ (defstruct (base85-encode-state (:include encode-state) (:copier nil) + (:predicate nil) (:constructor make-base85-encode-state (&aux (descriptor (base85-format-descriptor))))) ;; TODO: Clever hack for little-endian machines: fill in GROUP @@ -160,6 +161,7 @@ (defstruct (base85-decode-state (:include decode-state) (:copier nil) + (:predicate nil) (:constructor %make-base85-decode-state (&aux (descriptor (base85-format-descriptor))))) (bits 0 :type (unsigned-byte 32)) diff --git a/types.lisp b/types.lisp index 00d4e3d..b74e09a 100644 --- a/types.lisp +++ b/types.lisp @@ -34,12 +34,14 @@ (defstruct (state (:copier nil) + (:predicate nil) (:constructor nil)) (descriptor (required-argument) :type format-descriptor :read-only t)) (defstruct (encode-state (:include state) (:copier nil) + (:predicate nil) (:constructor nil)) ;; LINE-BREAK describes after how many characters we should be ;; inserting newlines into the encoded output. It is zero if we @@ -52,6 +54,7 @@ (defstruct (decode-state (:include state) (:copier nil) + (:predicate nil) (:constructor nil)) ;; FINISHED-INPUT-P is either T or NIL depending on whether we have ;; seen all of the input to be encoded. From 2d6521eaf118f68678e4dba7ce5dbe320f3dbe31 Mon Sep 17 00:00:00 2001 From: Nathan Froyd Date: Sat, 13 Feb 2010 19:10:42 -0500 Subject: [PATCH 68/76] return next i/o index from encoder functions This is more consistent with what Common Lisp already does (e.g. READ-SEQUENCE), as well as being slightly more efficient. --- ascii85.lisp | 12 +++++------- base16.lisp | 12 +++++------- base32.lisp | 20 +++++++++----------- base64.lisp | 16 +++++++--------- base85.lisp | 12 +++++------- octets.lisp | 9 ++++++--- 6 files changed, 37 insertions(+), 44 deletions(-) diff --git a/ascii85.lisp b/ascii85.lisp index 3e0c6b7..24c1922 100644 --- a/ascii85.lisp +++ b/ascii85.lisp @@ -42,15 +42,13 @@ (declaim (inline ascii85-encode)) (defun ascii85-encoder (state output input - output-start output-end - input-start input-end lastp converter) + output-index output-end + input-index input-end lastp converter) (declare (type ascii85-encode-state state)) (declare (type simple-octet-vector input)) - (declare (type index output-start output-end input-start input-end)) + (declare (type index output-index output-end input-index input-end)) (declare (type function converter)) - (let ((input-index input-start) - (output-index output-start) - (bits (ascii85-encode-state-bits state)) + (let ((bits (ascii85-encode-state-bits state)) (pending (ascii85-encode-state-pending state)) (output-group (ascii85-encode-state-output-group state)) (output-pending (ascii85-encode-state-output-pending state))) @@ -133,7 +131,7 @@ (setf (ascii85-encode-state-bits state) bits (ascii85-encode-state-pending state) pending (ascii85-encode-state-output-pending state) output-pending)) - (values (- input-index input-start) (- output-index output-start))))) + (values input-index output-index)))) (defun octets->octets/encode/ascii85 (state output input output-start output-end diff --git a/base16.lisp b/base16.lisp index 52132ec..ad884b3 100644 --- a/base16.lisp +++ b/base16.lisp @@ -41,15 +41,13 @@ (declaim (inline base16-encoder)) (defun base16-encoder (state output input - output-start output-end - input-start input-end lastp converter) + output-index output-end + input-index input-end lastp converter) (declare (type base16-encode-state state)) (declare (type simple-octet-vector input)) - (declare (type index output-start output-end input-start input-end)) + (declare (type index output-index output-end input-index input-end)) (declare (type function converter)) - (let ((input-index input-start) - (output-index output-start) - (bits (base16-encode-state-bits state)) + (let ((bits (base16-encode-state-bits state)) (n-bits (base16-encode-state-n-bits state)) (table (base16-encode-state-table state))) (declare (type index input-index output-index)) @@ -98,7 +96,7 @@ RESTORE-STATE (setf (base16-encode-state-bits state) bits (base16-encode-state-n-bits state) n-bits)) - (values (- input-index input-start) (- output-index output-start)))) + (values input-index output-index))) (defun encoded-length/base16 (count) "Return the number of characters required to encode COUNT octets in Base16." diff --git a/base32.lisp b/base32.lisp index 74442cb..7af933d 100644 --- a/base32.lisp +++ b/base32.lisp @@ -38,19 +38,17 @@ (declaim (inline base32-encoder)) (defun base32-encoder (state output input - output-start output-end - input-start input-end lastp converter) + output-index output-end + input-index input-end lastp converter) (declare (type base32-encode-state state)) (declare (type simple-octet-vector input)) - (declare (type index output-start output-end input-start input-end)) + (declare (type index output-index output-end input-index input-end)) (declare (type function converter)) - (let* ((input-index input-start) - (output-index output-start) - (bits (base32-encode-state-bits state)) - (n-bits (base32-encode-state-n-bits state)) - (table (base32-encode-state-table state)) - (n-pad-chars #.(make-array 5 :initial-contents '(0 4 1 6 3) - :element-type 'fixnum))) + (let ((bits (base32-encode-state-bits state)) + (n-bits (base32-encode-state-n-bits state)) + (table (base32-encode-state-table state)) + (n-pad-chars #.(make-array 5 :initial-contents '(0 4 1 6 3) + :element-type 'fixnum))) (declare (type index input-index output-index)) (declare (type (unsigned-byte 16) bits)) @@ -112,7 +110,7 @@ RESTORE-STATE (setf (base32-encode-state-bits state) bits (base32-encode-state-n-bits state) n-bits)) - (values (- input-index input-start) (- output-index output-start)))) + (values input-index output-index))) (defun octets->octets/encode/base32 (state output input output-start output-end diff --git a/base64.lisp b/base64.lisp index 069a54e..880b7e0 100644 --- a/base64.lisp +++ b/base64.lisp @@ -38,17 +38,15 @@ (declaim (inline base64-encoder)) (defun base64-encoder (state output input - output-start output-end - input-start input-end lastp converter) + output-index output-end + input-index input-end lastp converter) (declare (type base64-encode-state state)) (declare (type simple-octet-vector input)) - (declare (type index output-start output-end input-start input-end)) + (declare (type index output-index output-end input-index input-end)) (declare (type function converter)) - (let* ((input-index input-start) - (output-index output-start) - (bits (base64-encode-state-bits state)) - (n-bits (base64-encode-state-n-bits state)) - (table (base64-encode-state-table state))) + (let ((bits (base64-encode-state-bits state)) + (n-bits (base64-encode-state-n-bits state)) + (table (base64-encode-state-table state))) (declare (type index input-index output-index)) (declare (type (unsigned-byte 16) bits)) (declare (type fixnum n-bits)) @@ -140,7 +138,7 @@ RESTORE-STATE (setf (base64-encode-state-bits state) bits (base64-encode-state-n-bits state) n-bits)) - (values (- input-index input-start) (- output-index output-start)))) + (values input-index output-index))) (defun octets->octets/encode/base64 (state output input output-start output-end diff --git a/base85.lisp b/base85.lisp index de5d948..373daa5 100644 --- a/base85.lisp +++ b/base85.lisp @@ -48,15 +48,13 @@ (declaim (inline base85-encode)) (defun base85-encoder (state output input - output-start output-end - input-start input-end lastp converter) + output-index output-end + input-index input-end lastp converter) (declare (type base85-encode-state state)) (declare (type simple-octet-vector input)) - (declare (type index output-start output-end input-start input-end)) + (declare (type index output-index output-end input-index input-end)) (declare (type function converter)) - (let ((input-index input-start) - (output-index output-start) - (bits (base85-encode-state-bits state)) + (let ((bits (base85-encode-state-bits state)) (pending (base85-encode-state-pending state)) (output-group (base85-encode-state-output-group state)) (output-pending (base85-encode-state-output-pending state)) @@ -137,7 +135,7 @@ (setf (base85-encode-state-bits state) bits (base85-encode-state-pending state) pending (base85-encode-state-output-pending state) output-pending)) - (values (- input-index input-start) (- output-index output-start))))) + (values input-index output-index)))) (defun octets->octets/encode/base85 (state output input output-start output-end diff --git a/octets.lisp b/octets.lisp index 2c9acd4..4c3e507 100644 --- a/octets.lisp +++ b/octets.lisp @@ -76,9 +76,12 @@ If ELEMENT-TYPE is a subtype of CHARACTER, then DESTINATION may also be a string. Similarly, if ELEMENT-TYPE is (UNSIGNED-BYTE 8) or an equivalent type, then DESTINATION may be an octet vector. In this case, OUTPUT-START and OUTPUT-END are used to determine the portion of -DESTINATION where the encoded output may be placed. The number of -octets encoded and the number of characters or bytes, respectively, -written are returned as multiple values. ELEMENT-TYPE is ignored. +DESTINATION where the encoded output may be placed. + +If DESTINATION is not NIL, The index of the first input element that was +not read and the index of the first output element that was not updated +are returned as multiple values. respectively, written are returned as +multiple values. ELEMENT-TYPE is ignored. If FINISHP is true, then in addition to any encoding of OCTETS, also output any necessary padding required by FORMAT." From d48bd52864f251d43ab4d0eed1dc5e1496565e45 Mon Sep 17 00:00:00 2001 From: Nathan Froyd Date: Sat, 13 Feb 2010 19:12:56 -0500 Subject: [PATCH 69/76] move FINISHED-INPUT-P to common superclass --- types.lisp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/types.lisp b/types.lisp index b74e09a..00765e1 100644 --- a/types.lisp +++ b/types.lisp @@ -36,7 +36,12 @@ (:copier nil) (:predicate nil) (:constructor nil)) - (descriptor (required-argument) :type format-descriptor :read-only t)) + (descriptor (required-argument) :type format-descriptor :read-only t) + ;; FINISHED-INPUT-P is either T or NIL depending on whether we have + ;; seen all of the input. + (finished-input-p nil) + ;; Likewise for FINISHED-OUTPUT-P. + (finished-output-p nil)) (defstruct (encode-state (:include state) @@ -46,16 +51,11 @@ ;; LINE-BREAK describes after how many characters we should be ;; inserting newlines into the encoded output. It is zero if we ;; should never insert newlines. - (line-break 0 :type (integer 0 *)) - ;; FINISHED-INPUT-P is either T or NIL depending on whether we have - ;; seen all of the input to be encoded. - (finished-input-p nil)) + (line-break 0 :type (integer 0 *))) (defstruct (decode-state (:include state) (:copier nil) (:predicate nil) (:constructor nil)) - ;; FINISHED-INPUT-P is either T or NIL depending on whether we have - ;; seen all of the input to be encoded. - (finished-input-p nil)) + ) From fd2cb10cacc3e20e67d0c90b5c1d22100a5d2977 Mon Sep 17 00:00:00 2001 From: Nathan Froyd Date: Sat, 13 Feb 2010 19:16:40 -0500 Subject: [PATCH 70/76] export format names from the BINASCII package --- package.lisp | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/package.lisp b/package.lisp index 27e5124..d1a5c19 100644 --- a/package.lisp +++ b/package.lisp @@ -6,4 +6,14 @@ (:export #:encode-octets #:encode #:decode-octets #:decode + + ;; Format names. + #:base16 + #:hex + #:base32 + #:base32hex + #:base64 + #:base64url + #:base85 + #:ascii85 )) From f4f53a85345ddf3fcbd3c9b59618439a22a74805 Mon Sep 17 00:00:00 2001 From: Nathan Froyd Date: Sat, 13 Feb 2010 22:13:38 -0500 Subject: [PATCH 71/76] optimize base32 and base64 encodings by properly declaring N-BITS --- base32.lisp | 4 ++-- base64.lisp | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/base32.lisp b/base32.lisp index 7af933d..de086be 100644 --- a/base32.lisp +++ b/base32.lisp @@ -31,7 +31,7 @@ (&aux (descriptor (base32-format-descriptor)) (table *base32hex-encode-table*)))) (bits 0 :type (unsigned-byte 16)) - (n-bits 0 :type fixnum) + (n-bits 0 :type (unsigned-byte 8)) (table *base32-encode-table* :read-only t :type (simple-array base-char (32))) (padding-remaining 0 :type (integer 0 6))) @@ -52,7 +52,7 @@ (declare (type index input-index output-index)) (declare (type (unsigned-byte 16) bits)) - (declare (type fixnum n-bits)) + (declare (type (unsigned-byte 8) n-bits)) (declare (type (simple-array fixnum (5)) n-pad-chars)) (tagbody PAD-CHECK diff --git a/base64.lisp b/base64.lisp index 880b7e0..7d0fc79 100644 --- a/base64.lisp +++ b/base64.lisp @@ -32,7 +32,7 @@ (&aux (descriptor (base64-format-descriptor)) (table *base64url-encode-table*)))) (bits 0 :type (unsigned-byte 16)) - (n-bits 0 :type fixnum) + (n-bits 0 :type (unsigned-byte 8)) (table *base64-encode-table* :read-only t :type (simple-array base-char (64))) (padding-remaining 0 :type (integer 0 3))) @@ -49,7 +49,7 @@ (table (base64-encode-state-table state))) (declare (type index input-index output-index)) (declare (type (unsigned-byte 16) bits)) - (declare (type fixnum n-bits)) + (declare (type (unsigned-byte 8) n-bits)) (tagbody PAD-CHECK (when (base64-encode-state-finished-input-p state) From c838cf4f6cb6051d4a44f6ff796dfcac078886fb Mon Sep 17 00:00:00 2001 From: Nathan Froyd Date: Sat, 13 Feb 2010 23:11:41 -0500 Subject: [PATCH 72/76] add ascii85 to testing regimen --- tests/tests.lisp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/tests.lisp b/tests/tests.lisp index d542ea6..8d4d4d6 100644 --- a/tests/tests.lisp +++ b/tests/tests.lisp @@ -69,7 +69,7 @@ STRING contains any character whose CHAR-CODE is greater than 255." (cons :encoding-ascii-test 'encoding-test-ascii))) (eval-when (:compile-toplevel :load-toplevel :execute) - (defparameter *encodings* '(:base16 :base32 :base32hex :base64 :base85))) + (defparameter *encodings* '(:base16 :base32 :base32hex :base64 :base85 :ascii85))) #.(loop for e in *encodings* collect `(rtest:deftest ,(intern (format nil "~A/TO-NIL/BASE-CHAR" e)) From c0aa36a44478d02a1ce98a8a6bde0e5eef1e4bfe Mon Sep 17 00:00:00 2001 From: Nathan Froyd Date: Sat, 13 Feb 2010 23:12:44 -0500 Subject: [PATCH 73/76] fix issues in ascii85 encoding and decoding --- ascii85.lisp | 46 ++++++++++++++++++++++++++++++---------------- 1 file changed, 30 insertions(+), 16 deletions(-) diff --git a/ascii85.lisp b/ascii85.lisp index 24c1922..88e486f 100644 --- a/ascii85.lisp +++ b/ascii85.lisp @@ -28,6 +28,7 @@ (pending 0 :type (integer 0 4)) (output-group (make-array 5 :element-type 'base-char) :read-only t :type (simple-array base-char (5))) + (group-index 0 :type (integer 0 4)) (output-pending 0 :type (integer 0 5)) (table *ascii85-encode-table* :read-only t :type (simple-array base-char (85)))) @@ -40,7 +41,7 @@ complete (+ complete r 1))))) -(declaim (inline ascii85-encode)) +(declaim (notinline ascii85-encoder)) (defun ascii85-encoder (state output input output-index output-end input-index input-end lastp converter) @@ -51,18 +52,19 @@ (let ((bits (ascii85-encode-state-bits state)) (pending (ascii85-encode-state-pending state)) (output-group (ascii85-encode-state-output-group state)) + (group-index (ascii85-encode-state-group-index state)) (output-pending (ascii85-encode-state-output-pending state))) (declare (type index input-index output-index)) (declare (type (unsigned-byte 32) bits)) (declare (type (integer 0 4) pending)) - (declare (type (integer 0 5) output-pending)) + (declare (type (integer 0 5) output-pending group-index)) (flet ((expand-for-output (bits output-group) (cond ((zerop bits) (setf (aref output-group 0) #\z) 1) (t - (loop for i from 45 downto 0 + (loop for i from 4 downto 0 do (multiple-value-bind (b index) (truncate bits 85) (setf bits b (aref output-group i) @@ -85,20 +87,24 @@ (incf pending) (go INPUT-CHECK)) EXPAND-FOR-OUTPUT - (setf output-pending (expand-for-output bits output-group)) + (setf output-pending (expand-for-output bits output-group) + group-index 0) OUTPUT-CHECK (when (>= output-index output-end) (go DONE)) DO-OUTPUT - (when (> output-pending 0) + (when (< group-index output-pending) (setf (aref output output-index) (funcall converter - (aref output-group (decf output-pending)))) + (aref output-group group-index))) + (incf group-index) (incf output-index) (cond - ((zerop output-pending) + ((= group-index output-pending) (setf bits 0) (setf pending 0) + (setf group-index 0) + (setf output-pending 0) (go INPUT-CHECK)) (t (go OUTPUT-CHECK)))) @@ -107,7 +113,7 @@ (go RESTORE-STATE)) (setf (ascii85-encode-state-finished-input-p state) t) (setf output-pending (expand-for-output bits output-group) - output-pending (1+ pending)) + group-index 0) FLUSH-BITS (when (zerop output-pending) (go RESTORE-STATE)) @@ -115,21 +121,25 @@ (when (>= output-index output-end) (go RESTORE-STATE)) DO-FLUSH-OUTPUT - (when (> output-pending 0) + (when (< group-index output-pending) (setf (aref output output-index) (funcall converter - (aref output-group (decf output-pending)))) + (aref output-group group-index))) + (incf group-index) (incf output-index) (cond - ((zerop output-pending) + ((= group-index output-pending) (setf bits 0) (setf pending 0) + (setf group-index 0) + (setf output-pending 0) (go RESTORE-STATE)) (t (go FLUSH-OUTPUT-CHECK)))) RESTORE-STATE (setf (ascii85-encode-state-bits state) bits (ascii85-encode-state-pending state) pending + (ascii85-encode-state-group-index state) group-index (ascii85-encode-state-output-pending state) output-pending)) (values input-index output-index)))) @@ -213,7 +223,7 @@ (v (funcall converter c)) (d (dtref table v))) (cond - ((eql c (funcall converter #\z)) + ((eql v (funcall converter #\z)) (unless (zerop pending) (error "z found in the middle of an ascii85 group")) (incf input-index) @@ -234,12 +244,16 @@ (unless lastp (go RESTORE-STATE)) (setf (ascii85-decode-state-finished-input-p state) t) - ;; We should *always* have a complete group or nothing at this - ;; point. EOT-VALIDITY-CHECK - (when (<= 1 pending 4) + (when (zerop pending) + (go RESTORE-STATE)) + (when (= pending 1) (error "invalid ascii85 input")) - (setf output-pending (if (zerop pending) 0 4)) + (dotimes (i (- 5 pending)) + (setf bits (+ (* bits 85) 84))) + (setf output-pending (1- pending) + bits (ldb (byte (* output-pending 8) (* (- 4 output-pending) 8)) + bits)) FLUSH-BITS (when (zerop output-pending) (go RESTORE-STATE)) From 30295a5015f9417bd5f5600e565cac9d0cbf6a11 Mon Sep 17 00:00:00 2001 From: Nathan Froyd Date: Sat, 13 Feb 2010 23:13:09 -0500 Subject: [PATCH 74/76] return a shorter vector from ENCODE-TO-FRESH-VECTOR if necessary --- octets.lisp | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/octets.lisp b/octets.lisp index 4c3e507..948af3f 100644 --- a/octets.lisp +++ b/octets.lisp @@ -47,9 +47,13 @@ (declare (type format-descriptor fd)) (flet ((frob (etype encode-fun) (let ((v (make-array length :element-type etype))) - (funcall encode-fun state v input - 0 length start end t) - v))) + (multiple-value-bind (input-index output-index) + (funcall encode-fun state v input + 0 length start end t) + (declare (ignore input-index)) + (if (= output-index length) + v + (subseq v 0 output-index)))))) (declare (inline frob)) (ecase (canonicalize-element-type element-type) (character From a123e91d0363d576a52e255abc7a05557b06ab07 Mon Sep 17 00:00:00 2001 From: Nathan Froyd Date: Mon, 15 Feb 2010 14:13:43 -0500 Subject: [PATCH 75/76] add tests for distinct element types Make sure we run tests that exercise the :ELEMENT-TYPE keyword argument to ENCODE. Also make sure that DECODE can handle inputs that aren't necessarily strings. --- tests/tests.lisp | 40 +++++++++++++++++++++++++++++++--------- 1 file changed, 31 insertions(+), 9 deletions(-) diff --git a/tests/tests.lisp b/tests/tests.lisp index 8d4d4d6..8792145 100644 --- a/tests/tests.lisp +++ b/tests/tests.lisp @@ -44,11 +44,26 @@ STRING contains any character whose CHAR-CODE is greater than 255." (apply test-function name (cdr form))))) finally (return t))))) +(defvar *coding-output-element-type* 'base-char) + (defun encoding-test* (name input encoded-output decoded-length) - (let ((output (binascii:encode input name :end decoded-length)) - (decoded-input (binascii:decode encoded-output name - :decoded-length decoded-length))) - (when (mismatch output encoded-output) + (let* ((output (binascii:encode input name :end decoded-length + :element-type *coding-output-element-type*)) + (mismatchable-encoded-output + (cond + ((or (eql *coding-output-element-type* 'base-char) + (eql *coding-output-element-type* 'character)) + encoded-output) + ((equal *coding-output-element-type* '(unsigned-byte 8)) + (ascii-string-to-octets encoded-output)) + (t + (error "unknown value for *CODING-OUTPUT-ELEMENT-TYPE* ~A" + *coding-output-element-type*)))) + (decoded-input (binascii:decode mismatchable-encoded-output name + :decoded-length decoded-length))) + (unless (typep output `(array ,*coding-output-element-type* (*))) + (error "encoded output not of proper type")) + (when (mismatch output mismatchable-encoded-output) (error "encoding ~A failed on ~A, produced ~A, wanted ~A" name input output encoded-output)) (when (mismatch input decoded-input :end1 decoded-length :end2 decoded-length) @@ -71,8 +86,15 @@ STRING contains any character whose CHAR-CODE is greater than 255." (eval-when (:compile-toplevel :load-toplevel :execute) (defparameter *encodings* '(:base16 :base32 :base32hex :base64 :base85 :ascii85))) -#.(loop for e in *encodings* - collect `(rtest:deftest ,(intern (format nil "~A/TO-NIL/BASE-CHAR" e)) - (run-test-vector-file ,e *encoding-tests*) - t) into forms - finally (return `(progn ,@forms))) +#.(flet ((deftest-form (e eltype) + (let ((pretty-name (if (equal eltype '(unsigned-byte 8)) + 'ub8 + eltype))) + `(rtest:deftest ,(intern (format nil "~A/TO-NIL/~A" e pretty-name)) + (let ((*coding-output-element-type* ',eltype)) + (run-test-vector-file ,e *encoding-tests*)) + t)))) + (loop for e in *encodings* + append (loop for eltype in '(base-char character (unsigned-byte 8)) + collect (deftest-form e eltype)) into forms + finally (return `(progn ,@forms)))) From 87fa2cd7e12e5b6b654455e19db8b83bba35b5a5 Mon Sep 17 00:00:00 2001 From: Nathan Froyd Date: Mon, 15 Feb 2010 14:26:43 -0500 Subject: [PATCH 76/76] fix formats that explicitly test for particular characters base32, base64, and ascii85 all need to check for particular characters: CONVERTER logic in the decoding functions was misinterpreted for this case. CONVERTER is really for turning elements of the input sequence into integers and was inappropriately being used to try to convert the special characters into integers, which didn't work when decoding octet vectors. For those checks, explicitly check the type of sequence we're decoding from and take the appropriate transformation on the special character. --- ascii85.lisp | 4 +++- base32.lisp | 9 +++++++-- base64.lisp | 9 +++++++-- 3 files changed, 17 insertions(+), 5 deletions(-) diff --git a/ascii85.lisp b/ascii85.lisp index 88e486f..a6e8b12 100644 --- a/ascii85.lisp +++ b/ascii85.lisp @@ -223,7 +223,9 @@ (v (funcall converter c)) (d (dtref table v))) (cond - ((eql v (funcall converter #\z)) + ((eql v (if (typep input 'simple-octet-vector) + (char-code #\z) + (funcall converter #\z))) (unless (zerop pending) (error "z found in the middle of an ascii85 group")) (incf input-index) diff --git a/base32.lisp b/base32.lisp index de086be..36d5198 100644 --- a/base32.lisp +++ b/base32.lisp @@ -200,7 +200,9 @@ (let* ((c (aref input input-index)) (v (funcall converter c)) (d (dtref table v))) - (when (= v (funcall converter #\=)) + (when (= v (if (typep input 'simple-octet-vector) + (char-code #\=) + (funcall converter #\=))) (go SAW-EQUAL)) (when (= d +dt-invalid+) (error "invalid base32 character ~A at position ~D" c input-index)) @@ -244,7 +246,10 @@ (go RESTORE-STATE)) EAT-EQUAL (let ((v (aref input input-index))) - (unless (= (funcall converter v) (funcall converter #\=)) + (unless (= (funcall converter v) + (if (typep input 'simple-octet-vector) + (char-code #\=) + (funcall converter #\=))) (error "invalid base32 input ~A at position ~D" v input-index)) (incf input-index) (decf padding-remaining) diff --git a/base64.lisp b/base64.lisp index 7d0fc79..3cf1639 100644 --- a/base64.lisp +++ b/base64.lisp @@ -225,7 +225,9 @@ (let* ((c (aref input input-index)) (v (funcall converter c)) (d (dtref table v))) - (when (= v (funcall converter #\=)) + (when (= v (if (typep input 'simple-octet-vector) + (char-code #\=) + (funcall converter #\=))) (go SAW-EQUAL)) (when (= d +dt-invalid+) (error "invalid base64 character ~A at position ~D" c input-index)) @@ -253,7 +255,10 @@ (go RESTORE-STATE)) EAT-EQUAL (let ((v (aref input input-index))) - (unless (= (funcall converter v) (funcall converter #\=)) + (unless (= (funcall converter v) + (if (typep input 'simple-octet-vector) + (char-code #\=) + (funcall converter #\=))) (error "invalid base64 input ~A at position ~D" v input-index)) (incf input-index) (decf padding-remaining)