diff --git a/TODO b/TODO index 6a5404d..a3d8b63 100644 --- a/TODO +++ b/TODO @@ -7,8 +7,9 @@ that paradigm up with variable-sized groups. * fix ascii85 encoding Must SHRINK-VECTOR on NIL destination in case we encoded 'z'. * line-breaking encoded output -This should be easy; just pass a slightly different closure into this -main encoding function. +This is probably really only doable for encoding to freshly-consed vectors. +Certainly it's much more doable for clients with the upcoming async +interface. * compiler macros for optimizing constant-format versions of encode/decode The trick will be to do this without adding too much bloat. Will probably only do this for a NIL destination, as that's the case where we @@ -27,7 +28,37 @@ Trying to do minimal allocation; might have to just suck it up and admit that we are going to over-allocate fairly often. Especially in the ascii85 case, over-allocation is just a fact of life. +* figure out how to make the library fast for non-CMUCL/SBCL +We might just have to accept a bit of code bloat everyplace else, by +duplicating logic/functions for simple vs. non-simple. +I don't want to restrict clients to always using simple arrays everywhere. + +CCL looks like it ought to be able to get around this, but I can't find +the magic call. * ignoring whitespace when decoding We are definitely going to over-allocate if we permit whitespace. * other formats Python's binascii module has a few we might consider adding. +* convert to async API +** encode +*** DONE base16 + CLOSED: [2010-02-09 Tue 22:26] +*** DONE base32 + CLOSED: [2010-02-09 Tue 22:26] +*** DONE base64 + CLOSED: [2010-02-09 Tue 22:27] +*** DONE base85 + CLOSED: [2010-02-09 Tue 22:27] +*** DONE ascii85 + CLOSED: [2010-02-09 Tue 22:27] +** decode +*** DONE base16 + CLOSED: [2010-02-09 Tue 22:27] +*** DONE base32 + CLOSED: [2010-02-09 Tue 22:27] +*** DONE base64 + CLOSED: [2010-02-09 Tue 22:27] +*** DONE base85 + CLOSED: [2010-02-13 Sat 15:33] +*** DONE ascii85 + CLOSED: [2010-02-13 Sat 15:33] diff --git a/ascii85.lisp b/ascii85.lisp index 7cd707a..a6e8b12 100644 --- a/ascii85.lisp +++ b/ascii85.lisp @@ -5,7 +5,35 @@ (defvar *ascii85-encode-table* #.(coerce "!\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstu" 'simple-base-string)) -(defun encoded-length-ascii85 (count) +(defun ascii85-format-descriptor () + (let* ((cell (load-time-value (list nil))) + (fd (car cell))) + (if fd + fd + (setf (car cell) + (make-format-descriptor #'encoded-length/ascii85 + #'octets->string/ascii85 + #'octets->octets/encode/ascii85 + #'decoded-length-ascii85 + #'string->octets/ascii85 + #'octets->octets/decode/ascii85))))) + +(defstruct (ascii85-encode-state + (:include encode-state) + (:copier nil) + (:predicate nil) + (:constructor make-ascii85-encode-state + (&aux (descriptor (ascii85-format-descriptor))))) + (bits 0 :type (unsigned-byte 32)) + (pending 0 :type (integer 0 4)) + (output-group (make-array 5 :element-type 'base-char) + :read-only t :type (simple-array base-char (5))) + (group-index 0 :type (integer 0 4)) + (output-pending 0 :type (integer 0 5)) + (table *ascii85-encode-table* :read-only t + :type (simple-array base-char (85)))) + +(defun encoded-length/ascii85 (count) "Return the number of characters required to encode COUNT octets in Ascii85." (multiple-value-bind (q r) (truncate count 4) (let ((complete (* q 5))) @@ -13,91 +41,266 @@ complete (+ complete r 1))))) -(defun encode-octets-ascii85 (octets start end table writer) - (declare (type (simple-array (unsigned-byte 8) (*)) octets)) - (declare (type index start end)) - (declare (type function writer)) - (declare (ignore table)) - (flet ((output (buffer group count) - (if (zerop group) - (funcall writer #\z) - (loop for i from 4 downto 0 - do (multiple-value-bind (g b) (truncate group 85) - (setf group g - (aref buffer i) (code-char (+ #.(char-code #\!) b)))) - finally (dotimes (i (1+ count)) - (funcall writer (aref buffer i))))))) - (loop with length = (- end start) - with buffer = (make-string 5) - with group of-type (unsigned-byte 32) = 0 - with count of-type fixnum = 0 - with shift of-type fixnum = 24 - until (zerop length) - do (setf group (logior group (ash (aref octets start) shift))) - (incf start) - (decf length) - (decf shift 8) - (when (= (incf count) 4) - (output buffer group count) - (setf group 0 count 0 shift 24)) - finally (unless (zerop count) - (output buffer group count))))) - -(defmethod encoding-tools ((format (eql :ascii85))) - (values #'encode-octets-ascii85 #'encoded-length-ascii85 - *ascii85-encode-table*)) +(declaim (notinline ascii85-encoder)) +(defun ascii85-encoder (state output input + output-index output-end + input-index input-end lastp converter) + (declare (type ascii85-encode-state state)) + (declare (type simple-octet-vector input)) + (declare (type index output-index output-end input-index input-end)) + (declare (type function converter)) + (let ((bits (ascii85-encode-state-bits state)) + (pending (ascii85-encode-state-pending state)) + (output-group (ascii85-encode-state-output-group state)) + (group-index (ascii85-encode-state-group-index state)) + (output-pending (ascii85-encode-state-output-pending state))) + (declare (type index input-index output-index)) + (declare (type (unsigned-byte 32) bits)) + (declare (type (integer 0 4) pending)) + (declare (type (integer 0 5) output-pending group-index)) + (flet ((expand-for-output (bits output-group) + (cond + ((zerop bits) + (setf (aref output-group 0) #\z) + 1) + (t + (loop for i from 4 downto 0 + do (multiple-value-bind (b index) (truncate bits 85) + (setf bits b + (aref output-group i) + (code-char (+ #.(char-code #\!) index)))) + finally (return 5)))))) + (tagbody + PAD-CHECK + (when (ascii85-encode-state-finished-input-p state) + (go FLUSH-BITS)) + INPUT-CHECK + (when (>= input-index input-end) + (go DONE)) + DO-INPUT + (when (< pending 4) + (setf bits (ldb (byte 32 0) + (logior (ash (aref input input-index) + (- 24 (* pending 8))) + bits))) + (incf input-index) + (incf pending) + (go INPUT-CHECK)) + EXPAND-FOR-OUTPUT + (setf output-pending (expand-for-output bits output-group) + group-index 0) + OUTPUT-CHECK + (when (>= output-index output-end) + (go DONE)) + DO-OUTPUT + (when (< group-index output-pending) + (setf (aref output output-index) + (funcall converter + (aref output-group group-index))) + (incf group-index) + (incf output-index) + (cond + ((= group-index output-pending) + (setf bits 0) + (setf pending 0) + (setf group-index 0) + (setf output-pending 0) + (go INPUT-CHECK)) + (t + (go OUTPUT-CHECK)))) + DONE + (unless lastp + (go RESTORE-STATE)) + (setf (ascii85-encode-state-finished-input-p state) t) + (setf output-pending (expand-for-output bits output-group) + group-index 0) + FLUSH-BITS + (when (zerop output-pending) + (go RESTORE-STATE)) + FLUSH-OUTPUT-CHECK + (when (>= output-index output-end) + (go RESTORE-STATE)) + DO-FLUSH-OUTPUT + (when (< group-index output-pending) + (setf (aref output output-index) + (funcall converter + (aref output-group group-index))) + (incf group-index) + (incf output-index) + (cond + ((= group-index output-pending) + (setf bits 0) + (setf pending 0) + (setf group-index 0) + (setf output-pending 0) + (go RESTORE-STATE)) + (t + (go FLUSH-OUTPUT-CHECK)))) + RESTORE-STATE + (setf (ascii85-encode-state-bits state) bits + (ascii85-encode-state-pending state) pending + (ascii85-encode-state-group-index state) group-index + (ascii85-encode-state-output-pending state) output-pending)) + (values input-index output-index)))) + +(defun octets->octets/encode/ascii85 (state output input + output-start output-end + input-start input-end lastp) + (declare (type simple-octet-vector output)) + (declare (optimize speed)) + (ascii85-encoder state output input output-start output-end + input-start input-end lastp #'char-code)) + +(defun octets->string/ascii85 (state output input + output-start output-end + input-start input-end lastp) + (declare (type simple-string output)) + (declare (optimize speed)) + (ascii85-encoder state output input output-start output-end + input-start input-end lastp #'identity)) (defvar *ascii85-decode-table* (make-decode-table *ascii85-encode-table*)) (declaim (type decode-table *ascii85-decode-table*)) +(defstruct (ascii85-decode-state + (:include decode-state) + (:copier nil) + (:predicate nil) + (:constructor %make-ascii85-decode-state + (&aux (descriptor (ascii85-format-descriptor))))) + (bits 0 :type (unsigned-byte 32)) + (pending 0 :type (integer 0 5)) + (output-pending 0 :type (integer 0 4)) + (table *ascii85-decode-table* :read-only t :type decode-table)) + +(defun make-ascii85-decode-state (case-fold map01) + (declare (ignore case-fold map01)) + (%make-ascii85-decode-state)) + +(defun ascii85-decoder (state output input + output-index output-end + input-index input-end lastp converter) + (declare (type ascii85-decode-state state)) + (declare (type simple-octet-vector output)) + (declare (type index output-index output-end input-index input-end)) + (declare (type function converter)) + (let ((bits (ascii85-decode-state-bits state)) + (pending (ascii85-decode-state-pending state)) + (output-pending (ascii85-decode-state-output-pending state)) + (table (ascii85-decode-state-table state))) + (declare (type (unsigned-byte 32) bits)) + (declare (type (integer 0 5) pending)) + (declare (type (integer 0 4) output-pending)) + (tagbody + FINISHED-CHECK + (when (ascii85-decode-state-finished-input-p state) + (go FLUSH-BITS)) + OUTPUT-AVAILABLE-CHECK + (when (zerop output-pending) + (go INPUT-AVAILABLE-CHECK)) + OUTPUT-SPACE-CHECK + (when (>= output-index output-end) + (go DONE)) + DO-OUTPUT + (setf (aref output output-index) + (ldb (byte 8 (* (decf output-pending) 8)) bits)) + (incf output-index) + (cond + ((zerop output-pending) + (setf bits 0) + (setf pending 0) + (setf output-pending 0) + (go INPUT-AVAILABLE-CHECK)) + (t + (go OUTPUT-SPACE-CHECK))) + INPUT-AVAILABLE-CHECK + (when (>= input-index input-end) + (go DONE)) + DO-INPUT + (cond + ((< pending 5) + (let* ((c (aref input input-index)) + (v (funcall converter c)) + (d (dtref table v))) + (cond + ((eql v (if (typep input 'simple-octet-vector) + (char-code #\z) + (funcall converter #\z))) + (unless (zerop pending) + (error "z found in the middle of an ascii85 group")) + (incf input-index) + (setf output-pending 4) + (go OUTPUT-SPACE-CHECK)) + ((= d +dt-invalid+) + (error "invalid ascii85 character ~A at position ~D" c input-index)) + (t + ;; FIXME: check for overflow. + (setf bits (+ (* bits 85) d)) + (incf pending) + (incf input-index) + (go INPUT-AVAILABLE-CHECK))))) + (t + (setf output-pending 4) + (go OUTPUT-SPACE-CHECK))) + DONE + (unless lastp + (go RESTORE-STATE)) + (setf (ascii85-decode-state-finished-input-p state) t) + EOT-VALIDITY-CHECK + (when (zerop pending) + (go RESTORE-STATE)) + (when (= pending 1) + (error "invalid ascii85 input")) + (dotimes (i (- 5 pending)) + (setf bits (+ (* bits 85) 84))) + (setf output-pending (1- pending) + bits (ldb (byte (* output-pending 8) (* (- 4 output-pending) 8)) + bits)) + FLUSH-BITS + (when (zerop output-pending) + (go RESTORE-STATE)) + FLUSH-OUTPUT-CHECK + (when (>= output-index output-end) + (go RESTORE-STATE)) + DO-FLUSH-OUTPUT + (when (> output-pending 0) + (setf (aref output output-index) + (ldb (byte 8 (* (decf output-pending) 8)) bits)) + (incf output-index) + (cond + ((zerop output-pending) + (setf bits 0) + (setf pending 0) + (setf output-pending 0) + (go RESTORE-STATE)) + (t + (go FLUSH-OUTPUT-CHECK)))) + RESTORE-STATE + (setf (ascii85-decode-state-bits state) bits + (ascii85-decode-state-pending state) pending + (ascii85-decode-state-output-pending state) output-pending)) + (values input-index output-index))) + +(defun string->octets/ascii85 (state output input + output-index output-end + input-index input-end lastp) + (declare (type simple-string input)) + (ascii85-decoder state output input output-index output-end + input-index input-end lastp #'char-code)) + +(defun octets->octets/decode/ascii85 (state output input + output-index output-end + input-index input-end lastp) + (declare (type simple-octet-vector input)) + (ascii85-decoder state output input output-index output-end + input-index input-end lastp #'identity)) + (defun decoded-length-ascii85 (length) ;; FIXME: There's nothing smart we can do without scanning the string. ;; We have to assume the worst case, that all the characters in the ;; string are #\z. (* length 5)) -(defun decode-octets-ascii85 (string start end length table writer) - (declare (type index start end)) - (declare (type function writer)) - (declare (type decode-table table)) - (flet ((do-decode (transform) - (do ((i 0) - (acc 0)) - ((>= start end) - (unless (zerop i) - (when (= i 1) - (error "corrupt ascii85 group")) - (dotimes (j (- 5 i)) - (setf acc (+ (* acc 85) 84))) - (dotimes (j (1- i)) - (funcall writer (ldb (byte 8 (* (- 3 j) 8)) acc))))) - (cond - ((>= i 5) - (unless (< acc (ash 1 32)) - (error "invalid ascii85 sequence")) - (dotimes (i 4) - (funcall writer (ldb (byte 8 (* (- 3 i) 8)) acc))) - (setf i 0 - acc 0)) - (t - (let* ((b (funcall transform (aref string start))) - (d (dtref table b))) - (incf start) - (cond - ((= b #.(char-code #\z)) - (unless (zerop i) - (error "z found in the middle of an ascii85 group")) - (dotimes (i 4) - (funcall writer 0))) - ((= d +dt-invalid+) - (error "invalid ascii85 character ~X" b)) - (t - (incf i) - (setf acc (+ (* acc 85) d)))))))))) - (declare (inline do-decode)) - (decode-dispatch string #'do-decode))) - -(defmethod decoding-tools ((format (eql :ascii85)) &key case-fold map01) - (declare (ignorable case-fold map01)) - (values #'decode-octets-ascii85 #'decoded-length-ascii85 - *ascii85-decode-table*)) +(register-descriptor-and-constructors :ascii85 (ascii85-format-descriptor) + #'make-ascii85-encode-state + #'make-ascii85-decode-state) diff --git a/base16.lisp b/base16.lisp index 0502202..ad884b3 100644 --- a/base16.lisp +++ b/base16.lisp @@ -11,64 +11,225 @@ (make-decode-table *base16-encode-table*)) (declaim (type decode-table *base16-decode-table*)) -(defun encoded-length-base16 (count) +(defun base16-format-descriptor () + (let* ((cell (load-time-value (list nil))) + (fd (car cell))) + (if fd + fd + (setf (car cell) + (make-format-descriptor #'encoded-length/base16 + #'octets->string/base16 + #'octets->octets/encode/base16 + #'decoded-length-base16 + #'string->octets/base16 + #'octets->octets/decode/base16))))) + +(defstruct (base16-encode-state + (:include encode-state) + (:copier nil) + (:predicate nil) + (:constructor make-base16-encode-state + (&aux (descriptor (base16-format-descriptor)) + (table *base16-encode-table*))) + (:constructor make-hex-encode-state + (&aux (descriptor (base16-format-descriptor)) + (table *hex-encode-table*)))) + (bits 0 :type (unsigned-byte 8)) + (n-bits 0 :type fixnum) + (table *base16-encode-table* :read-only t + :type (simple-array base-char (16)))) + +(declaim (inline base16-encoder)) +(defun base16-encoder (state output input + output-index output-end + input-index input-end lastp converter) + (declare (type base16-encode-state state)) + (declare (type simple-octet-vector input)) + (declare (type index output-index output-end input-index input-end)) + (declare (type function converter)) + (let ((bits (base16-encode-state-bits state)) + (n-bits (base16-encode-state-n-bits state)) + (table (base16-encode-state-table state))) + (declare (type index input-index output-index)) + (declare (type (unsigned-byte 8) bits)) + (declare (type fixnum n-bits)) + (tagbody + PAD-CHECK + (when (base16-encode-state-finished-input-p state) + (go FLUSH-BITS)) + INPUT-CHECK + (when (>= input-index input-end) + (go DONE)) + DO-INPUT + (when (zerop n-bits) + (setf bits (aref input input-index)) + (incf input-index) + (setf n-bits 8)) + OUTPUT-CHECK + (when (>= output-index output-end) + (go DONE)) + DO-OUTPUT + (decf n-bits 4) + (setf (aref output output-index) + (funcall converter (aref table (ldb (byte 4 n-bits) bits)))) + (incf output-index) + (if (>= n-bits 4) + (go OUTPUT-CHECK) + (go INPUT-CHECK)) + DONE + (unless lastp + (go RESTORE-STATE)) + (setf (base16-encode-state-finished-input-p state) t) + FLUSH-BITS + (when (zerop n-bits) + (go RESTORE-STATE)) + FLUSH-OUTPUT-CHECK + (when (>= output-index output-end) + (go RESTORE-STATE)) + DO-FLUSH-OUTPUT + (decf n-bits 4) + (setf (aref output output-index) + (funcall converter (aref table (ldb (byte 4 n-bits) bits)))) + (incf output-index) + (when (= n-bits 4) + (go FLUSH-OUTPUT-CHECK)) + RESTORE-STATE + (setf (base16-encode-state-bits state) bits + (base16-encode-state-n-bits state) n-bits)) + (values input-index output-index))) + +(defun encoded-length/base16 (count) "Return the number of characters required to encode COUNT octets in Base16." (* count 2)) -(defun encode-octets-base16 (octets start end table writer) - (declare (type (simple-array (unsigned-byte 8) (*)) octets)) - (declare (type index start end)) - (declare (type function writer)) - (declare (type (simple-array base-char (16)) table)) - (loop for i from start below end - do (let ((byte (aref octets i))) - (funcall writer (aref table (ldb (byte 4 4) byte))) - (funcall writer (aref table (ldb (byte 4 0) byte)))))) - -(defmethod encoding-tools ((format (eql :base16))) - (values #'encode-octets-base16 #'encoded-length-base16 - *base16-encode-table*)) - -(defmethod encoding-tools ((format (eql :hex))) - (values #'encode-octets-base16 #'encoded-length-base16 - *hex-encode-table*)) - -(defun decode-octets-base16 (string start end length table writer) - (declare (type index start end)) - (declare (type function writer)) - (declare (type decode-table table)) - (declare (optimize (speed 3))) - (flet ((do-decode (transform) - (loop for i from start below end by 2 - do (let* ((char1 (aref string i)) - (char2 (aref string (1+ i))) - (v1 (dtref table (funcall transform char1))) - (v2 (dtref table (funcall transform char2)))) - (when (= v1 +dt-invalid+) - (error "Invalid hex digit ~A" char1)) - (when (= v2 +dt-invalid+) - (error "Invalid hex digit ~A" char2)) - (funcall writer (+ (* v1 16) v2)))))) - (declare (inline do-decode)) - (decode-dispatch string #'do-decode))) +(defun octets->octets/encode/base16 (state output input + output-start output-end + input-start input-end lastp) + (declare (type simple-octet-vector output)) + (base16-encoder state output input output-start output-end + input-start input-end lastp #'char-code)) + +(defun octets->string/base16 (state output input + output-start output-end + input-start input-end lastp) + (declare (type simple-string output)) + (base16-encoder state output input output-start output-end + input-start input-end lastp #'identity)) + +(defun base16-decode-table (case-fold) + (if case-fold + (case-fold-decode-table *base16-decode-table* + *base16-encode-table*) + *base16-decode-table*)) + +(defstruct (base16-decode-state + (:include decode-state) + (:copier nil) + (:predicate nil) + (:constructor %make-base16-decode-state + (table + &aux (descriptor (base16-format-descriptor))))) + (bits 0 :type (unsigned-byte 8)) + (n-bits 0 :type fixnum) + (table *base16-decode-table* :read-only t :type decode-table)) + +(defun make-base16-decode-state (case-fold map01) + (declare (ignore map01)) + (%make-base16-decode-state (base16-decode-table case-fold))) + +(defun make-hex-decode-state (case-fold map01) + (declare (ignore case-fold map01)) + (%make-base16-decode-state (base16-decode-table t))) + +(defun base16-decoder (state output input + output-index output-end + input-index input-end lastp converter) + (declare (type base16-decode-state state)) + (declare (type simple-octet-vector output)) + (declare (type index output-index output-end input-index input-end)) + (declare (type function converter)) + (let ((bits (base16-decode-state-bits state)) + (n-bits (base16-decode-state-n-bits state)) + (table (base16-decode-state-table state))) + (declare (type (unsigned-byte 8) bits)) + (tagbody + START + (when (base16-decode-state-finished-input-p state) + (go FLUSH-BITS)) + OUTPUT-AVAILABLE-CHECK + (when (< n-bits 8) + (go INPUT-AVAILABLE-CHECK)) + OUTPUT-SPACE-CHECK + (when (>= output-index output-end) + (go DONE)) + DO-OUTPUT + (setf (aref output output-index) bits + bits 0 + n-bits 0) + (incf output-index) + (go INPUT-AVAILABLE-CHECK) + INPUT-AVAILABLE-CHECK + (when (>= input-index input-end) + (go DONE)) + DO-INPUT + (assert (< n-bits 8)) + (let* ((v (aref input input-index)) + (c (dtref table (funcall converter v)))) + (when (= c +dt-invalid+) + (error "invalid hex digit ~A at position ~D" v input-index)) + (incf input-index) + (cond + ((= n-bits 0) + (setf bits (* (logand c #xf) 16) + n-bits 4) + (go INPUT-AVAILABLE-CHECK)) + ((= n-bits 4) + (setf bits (+ bits (logand c #xf)) + n-bits 8) + (go OUTPUT-SPACE-CHECk)))) + DONE + (unless lastp + (go RESTORE-STATE)) + (setf (base16-decode-state-finished-input-p state) t) + FLUSH-BITS + (when (zerop n-bits) + (go RESTORE-STATE)) + FLUSH-OUTPUT-CHECK + (when (>= output-index output-end) + (go RESTORE-STATE)) + DO-FLUSH-OUTPUT + (when (= n-bits 4) + (error "attempting to decode an odd number of hex digits")) + (setf (aref output output-index) bits + bits 0 + n-bits 0) + RESTORE-STATE + (setf (base16-decode-state-n-bits state) n-bits + (base16-decode-state-bits state) bits)) + (values input-index output-index))) + +(defun string->octets/base16 (state output input + output-index output-end + input-index input-end lastp) + (declare (type simple-string input)) + (base16-decoder state output input output-index output-end + input-index input-end lastp #'char-code)) + +(defun octets->octets/decode/base16 (state output input + output-index output-end + input-index input-end lastp) + (declare (type simple-octet-vector input)) + (base16-decoder state output input output-index output-end + input-index input-end lastp #'identity)) (defun decoded-length-base16 (length) (unless (evenp length) (error "cannot decode an odd number of base16 characters")) (truncate length 2)) -(defmethod decoding-tools ((format (eql :base16)) &key case-fold map01) - (declare (ignorable case-fold map01)) - (values #'decode-octets-base16 - #'decoded-length-base16 - (if case-fold - (case-fold-decode-table *base16-decode-table* - *base16-encode-table*) - *base16-decode-table*))) - -(defmethod decoding-tools ((format (eql :hex)) &key case-fold map01) - (declare (ignorable case-fold map01)) - (values #'decode-octets-base16 - #'decoded-length-base16 - (case-fold-decode-table *base16-decode-table* - *base16-encode-table*))) +(register-descriptor-and-constructors :base16 (base16-format-descriptor) + #'make-base16-encode-state + #'make-base16-decode-state) +(register-descriptor-and-constructors :hex (base16-format-descriptor) + #'make-hex-encode-state + #'make-hex-decode-state) diff --git a/base32.lisp b/base32.lisp index 388979e..36d5198 100644 --- a/base32.lisp +++ b/base32.lisp @@ -7,100 +7,283 @@ (defvar *base32hex-encode-table* #.(coerce "0123456789ABCDEFGHIJKLMNOPQRSTUV" 'simple-base-string)) -(defun encode-octets-base32 (octets start end table writer) - (declare (type (simple-array (unsigned-byte 8) (*)) octets)) - (declare (type index start end)) - (declare (type function writer)) - (declare (type (simple-array base-char (32)) table)) - (loop for i from start below end - for bits of-type (unsigned-byte 16) = (aref octets i) - then (ldb (byte 16 0) (logior (ash bits 8) (aref octets i))) - for n-bits of-type fixnum = 8 then (+ n-bits 8) - do (loop while (>= n-bits 5) - do (decf n-bits 5) - (funcall writer (aref table (ldb (byte 5 n-bits) bits)))) - finally (let ((n-pad - (case n-bits - (3 - (funcall writer (aref table - (ash (ldb (byte 3 0) bits) 2))) - 6) - (1 - (funcall writer (aref table - (ash (ldb (byte 1 0) bits) 4))) - 4) - (4 - (funcall writer (aref table - (ash (ldb (byte 4 0) bits) 1))) - 3) - (2 - (funcall writer (aref table - (ash (ldb (byte 2 0) bits) 3))) - 1) - (otherwise 0)))) - (dotimes (i n-pad) - (funcall writer #\=))))) - -(defun encoded-length-base32 (count) - "Return the number of characters required to encode COUNT octets in Base32." - (* (ceiling count 5) 8)) +(defun base32-format-descriptor () + (let* ((cell (load-time-value (list nil))) + (fd (car cell))) + (if fd + fd + (setf (car cell) + (make-format-descriptor #'encoded-length/base32 + #'octets->string/base32 + #'octets->octets/encode/base32 + #'decoded-length-base32 + #'string->octets/base32 + #'octets->octets/decode/base32))))) + +(defstruct (base32-encode-state + (:include encode-state) + (:copier nil) + (:predicate nil) + (:constructor make-base32-encode-state + (&aux (descriptor (base32-format-descriptor)) + (table *base32-encode-table*))) + (:constructor make-base32hex-encode-state + (&aux (descriptor (base32-format-descriptor)) + (table *base32hex-encode-table*)))) + (bits 0 :type (unsigned-byte 16)) + (n-bits 0 :type (unsigned-byte 8)) + (table *base32-encode-table* :read-only t + :type (simple-array base-char (32))) + (padding-remaining 0 :type (integer 0 6))) + +(declaim (inline base32-encoder)) +(defun base32-encoder (state output input + output-index output-end + input-index input-end lastp converter) + (declare (type base32-encode-state state)) + (declare (type simple-octet-vector input)) + (declare (type index output-index output-end input-index input-end)) + (declare (type function converter)) + (let ((bits (base32-encode-state-bits state)) + (n-bits (base32-encode-state-n-bits state)) + (table (base32-encode-state-table state)) + (n-pad-chars #.(make-array 5 :initial-contents '(0 4 1 6 3) + :element-type 'fixnum))) + + (declare (type index input-index output-index)) + (declare (type (unsigned-byte 16) bits)) + (declare (type (unsigned-byte 8) n-bits)) + (declare (type (simple-array fixnum (5)) n-pad-chars)) + (tagbody + PAD-CHECK + (when (base32-encode-state-finished-input-p state) + (go PAD)) + INPUT-CHECK + (when (>= input-index input-end) + (go DONE)) + DO-INPUT + (when (< n-bits 5) + (setf bits (ldb (byte 16 0) + (logior (ash bits 8) (aref input input-index)))) + (incf input-index) + (incf n-bits 8)) + OUTPUT-CHECK + (when (>= output-index output-end) + (go DONE)) + DO-OUTPUT + (decf n-bits 5) + (setf (aref output output-index) + (funcall converter (aref table (ldb (byte 5 n-bits) bits)))) + (incf output-index) + (if (>= n-bits 5) + (go OUTPUT-CHECK) + (go INPUT-CHECK)) + DONE + (unless lastp + (go RESTORE-STATE)) + (setf (base32-encode-state-finished-input-p state) t) + (setf (base32-encode-state-padding-remaining state) + (aref n-pad-chars n-bits)) + PAD + (locally (declare (type (integer 0 4) n-bits)) + (let ((padding-remaining (base32-encode-state-padding-remaining state)) + (max-pad-chars (aref n-pad-chars n-bits))) + (declare (type (integer 0 6) padding-remaining)) + (declare (type (integer 0 6) max-pad-chars)) + (when (and (= padding-remaining max-pad-chars) + (< output-index output-end)) + (setf (aref output output-index) + (funcall converter + (aref table (ash (ldb (byte n-bits 0) bits) + (- 5 n-bits))))) + (incf output-index) + (decf padding-remaining)) + (when (< padding-remaining max-pad-chars) + (loop while (and (>= padding-remaining 0) + (< output-index output-end)) + do (setf (aref output output-index) (funcall converter #\=)) + (incf output-index) + (decf padding-remaining))) + (when (zerop padding-remaining) + (setf n-bits 0)) + (setf (base32-encode-state-padding-remaining state) padding-remaining))) + RESTORE-STATE + (setf (base32-encode-state-bits state) bits + (base32-encode-state-n-bits state) n-bits)) + (values input-index output-index))) -(defmethod encoding-tools ((format (eql :base32))) - (values #'encode-octets-base32 #'encoded-length-base32 - *base32-encode-table*)) +(defun octets->octets/encode/base32 (state output input + output-start output-end + input-start input-end lastp) + (declare (type simple-octet-vector output)) + (declare (optimize speed)) + (base32-encoder state output input output-start output-end + input-start input-end lastp #'char-code)) -(defmethod encoding-tools ((format (eql :base32hex))) - (values #'encode-octets-base32 #'encoded-length-base32 - *base32hex-encode-table*)) +(defun octets->string/base32 (state output input + output-start output-end + input-start input-end lastp) + (declare (type simple-string output)) + (declare (optimize speed)) + (base32-encoder state output input output-start output-end + input-start input-end lastp #'identity)) (defvar *base32-decode-table* (make-decode-table *base32-encode-table*)) (defvar *base32hex-decode-table* (make-decode-table *base32hex-encode-table*)) (declaim (type decode-table *base32-decode-table* *base32hex-decode-table*)) -(defun decode-octets-base32 (string start end length table writer) - (declare (type index start end)) - (declare (type decode-table table)) - (declare (type function writer)) - (flet ((do-decode (transform) - (loop with bits of-type (unsigned-byte 16) = 0 - with n-bits of-type (unsigned-byte 8) = 0 - for i from start below end - for char = (aref string i) - for value = (dtref table (funcall transform char)) - do (cond - ((>= value 0) - (setf bits (logand (logior (ash bits 5) value) #xffff)) - (incf n-bits 5) - (when (>= n-bits 8) - (decf n-bits 8) - (funcall writer (logand (ash bits (- n-bits)) #xff)) - (setf bits (logand bits #xff)))) - ((eql (funcall transform char) - (funcall transform #\=))) - (t - (error "bad character ~A in base32 decoding" char)))))) - (declare (inline do-decode)) - (decode-dispatch string #'do-decode))) +(defun base32-decode-table (case-fold map01) + (let ((table *base32-decode-table*)) + (when map01 + (setf table (copy-seq table)) + (setf (aref table (char-code #\0)) (aref table (char-code #\O))) + (case map01 + ((#\I #\L) (setf (aref table (char-code #\1)) + (aref table (char-code map01)))))) + (when case-fold + (setf table (case-fold-decode-table table *base32-encode-table*))) + table)) + +(defstruct (base32-decode-state + (:include decode-state) + (:copier nil) + (:predicate nil) + (:constructor %make-base32-decode-state + (table + &aux (descriptor (base32-format-descriptor))))) + (bits 0 :type (unsigned-byte 16)) + (n-bits 0 :type (unsigned-byte 8)) + (padding-remaining 0 :type (integer 0 6)) + (table *base32-decode-table* :read-only t :type decode-table)) + +(defun make-base32-decode-state (case-fold map01) + (%make-base32-decode-state (base32-decode-table case-fold map01))) + +(defun make-base32hex-decode-state (case-fold map01) + (declare (ignore case-fold map01)) + (%make-base32-decode-state *base32hex-decode-table*)) + +(defun base32-decoder (state output input + output-index output-end + input-index input-end lastp converter) + (declare (type base32-decode-state state)) + (declare (type simple-octet-vector output)) + (declare (type index output-index output-end input-index input-end)) + (declare (type function converter)) + (let ((bits (base32-decode-state-bits state)) + (n-bits (base32-decode-state-n-bits state)) + (padding-remaining (base32-decode-state-padding-remaining state)) + (table (base32-decode-state-table state))) + (declare (type (unsigned-byte 16) bits)) + (declare (type fixnum n-bits)) + (declare (type (integer 0 6) padding-remaining)) + (tagbody + PAD-CHECK + (when (base32-decode-state-finished-input-p state) + (go EAT-EQUAL-CHECK-PAD)) + OUTPUT-AVAILABLE-CHECK + (when (< n-bits 8) + (go INPUT-AVAILABLE-CHECK)) + OUTPUT-SPACE-CHECK + (when (>= output-index output-end) + (go DONE)) + DO-OUTPUT + (decf n-bits 8) + (setf (aref output output-index) (logand (ash bits (- n-bits)) #xff) + bits (logand bits #xff)) + (incf output-index) + (go INPUT-AVAILABLE-CHECK) + INPUT-AVAILABLE-CHECK + (when (>= input-index input-end) + (go DONE)) + DO-INPUT + (let* ((c (aref input input-index)) + (v (funcall converter c)) + (d (dtref table v))) + (when (= v (if (typep input 'simple-octet-vector) + (char-code #\=) + (funcall converter #\=))) + (go SAW-EQUAL)) + (when (= d +dt-invalid+) + (error "invalid base32 character ~A at position ~D" c input-index)) + (incf input-index) + (setf bits (ldb (byte 16 0) (logior (ash bits 5) d))) + (incf n-bits 5) + (go OUTPUT-AVAILABLE-CHECK)) + DONE + (unless lastp + (go RESTORE-STATE)) + SAW-EQUAL + (setf (base32-decode-state-finished-input-p state) t) + ;; A complete base32 group is: + ;; + ;; vvvvvvvv wwwwwwww xxxxxxxx yyyyyyyy zzzzzzzz + ;; + ;; which gets encoded by: + ;; + ;; vvvvv vvvww wwwww wxxxx xxxxy yyyyy yyzzz zzzzz + ;; + ;; so the intermediate bits left are: 3 1 4 2 0 + ;; corresponding to padding amounts : 6 4 3 1 0 (in characters) + ;; + ;; but we also have to handle cases where we start padding too + ;; soon: we can't handle padding after seeing 1 group of 5, 3 + ;; groups of 5, 4 groups of 5 or 6 groups of five. those + ;; correspond to 5 bits remaining (having not seen the 3 v's), 7 + ;; bits remaining (having not seen the 1 w), 4 bits remaining + ;; (having not seen the 4 x's), and 6 bits remaining (having not + ;; seen the 2 y's). + (let ((n-pad-chars #.(make-array 5 :initial-contents '(0 4 1 6 3) + :element-type 'fixnum))) + (if (<= n-bits 4) + (setf padding-remaining (aref n-pad-chars n-bits)) + (error "invalid base32 input"))) + EAT-EQUAL-CHECK-PAD + (when (zerop padding-remaining) + (go RESTORE-STATE)) + EAT-EQUAL-CHECK-INPUT + (when (>= input-index input-end) + (go RESTORE-STATE)) + EAT-EQUAL + (let ((v (aref input input-index))) + (unless (= (funcall converter v) + (if (typep input 'simple-octet-vector) + (char-code #\=) + (funcall converter #\=))) + (error "invalid base32 input ~A at position ~D" v input-index)) + (incf input-index) + (decf padding-remaining) + (go EAT-EQUAL-CHECK-PAD)) + RESTORE-STATE + (setf (base32-decode-state-n-bits state) n-bits + (base32-decode-state-bits state) bits + (base32-decode-state-padding-remaining state) padding-remaining)) + (values input-index output-index))) + +(defun string->octets/base32 (state output input + output-index output-end + input-index input-end lastp) + (declare (type simple-string input)) + (base32-decoder state output input output-index output-end + input-index input-end lastp #'char-code)) + +(defun octets->octets/decode/base32 (state output input + output-index output-end + input-index input-end lastp) + (declare (type simple-octet-vector input)) + (base32-decoder state output input output-index output-end + input-index input-end lastp #'identity)) + +(defun encoded-length/base32 (count) + "Return the number of characters required to encode COUNT octets in Base32." + (* (ceiling count 5) 8)) (defun decoded-length-base32 (length) (* (ceiling length 8) 5)) -(defmethod decoding-tools ((format (eql :base32)) &key case-fold map01) - (declare (ignorable case-fold map01)) - (values #'decode-octets-base32 - #'decoded-length-base32 - (let ((table *base32-decode-table*)) - (when map01 - (setf table (copy-seq table)) - (setf (aref table (char-code #\0)) (aref table (char-code #\O))) - (case map01 - ((#\I #\L) (setf (aref table (char-code #\1)) - (aref table (char-code map01)))))) - (when case-fold - (setf table (case-fold-decode-table table *base32-encode-table*))) - table))) - -(defmethod decoding-tools ((format (eql :base32hex)) &key case-fold map01) - (declare (ignorable case-fold map01)) - (values #'decode-octets-base32 #'decoded-length-base32 - *base32hex-decode-table*)) +(register-descriptor-and-constructors :base32 (base32-format-descriptor) + #'make-base32-encode-state + #'make-base32-decode-state) +(register-descriptor-and-constructors :base32hex (base32-format-descriptor) + #'make-base32hex-encode-state + #'make-base32hex-decode-state) diff --git a/base64.lisp b/base64.lisp index 1ac0f4a..3cf1639 100644 --- a/base64.lisp +++ b/base64.lisp @@ -8,39 +8,157 @@ (defvar *base64url-encode-table* #.(coerce "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_" 'simple-base-string)) -(defun encode-octets-base64 (octets start end table writer) - (declare (type (simple-array (unsigned-byte 8) (*)) octets)) - (declare (type index start end)) - (declare (type function writer)) - (declare (type (simple-array base-char (64)) table)) - (loop - for i from start below end - for bits of-type (unsigned-byte 16) = (aref octets i) - then (ldb (byte 16 0) (logior (ash bits 8) (aref octets i))) - for n-bits of-type fixnum = 8 then (+ n-bits 8) - do (loop while (>= n-bits 6) - do (decf n-bits 6) - (funcall writer (aref table (ldb (byte 6 n-bits) bits)))) - finally (cond - ((= n-bits 2) - (funcall writer (aref table (ash (ldb (byte 2 0) bits) 4))) - (funcall writer #\=) - (funcall writer #\=)) - ((= n-bits 4) - (funcall writer (aref table (ash (ldb (byte 4 0) bits) 2))) - (funcall writer #\=))))) - -(defun encoded-length-base64 (count) - "Return the number of characters required to encode COUNT octets in Base64." - (* (ceiling count 3) 4)) +(defun base64-format-descriptor () + (let* ((cell (load-time-value (list nil))) + (fd (car cell))) + (if fd + fd + (setf (car cell) + (make-format-descriptor #'encoded-length/base64 + #'octets->string/base64 + #'octets->octets/encode/base64 + #'decoded-length-base64 + #'string->octets/base64 + #'octets->octets/decode/base64))))) + +(defstruct (base64-encode-state + (:include encode-state) + (:copier nil) + (:predicate nil) + (:constructor make-base64-encode-state + (&aux (descriptor (base64-format-descriptor)) + (table *base64-encode-table*))) + (:constructor make-base64url-encode-state + (&aux (descriptor (base64-format-descriptor)) + (table *base64url-encode-table*)))) + (bits 0 :type (unsigned-byte 16)) + (n-bits 0 :type (unsigned-byte 8)) + (table *base64-encode-table* :read-only t :type (simple-array base-char (64))) + (padding-remaining 0 :type (integer 0 3))) + +(declaim (inline base64-encoder)) +(defun base64-encoder (state output input + output-index output-end + input-index input-end lastp converter) + (declare (type base64-encode-state state)) + (declare (type simple-octet-vector input)) + (declare (type index output-index output-end input-index input-end)) + (declare (type function converter)) + (let ((bits (base64-encode-state-bits state)) + (n-bits (base64-encode-state-n-bits state)) + (table (base64-encode-state-table state))) + (declare (type index input-index output-index)) + (declare (type (unsigned-byte 16) bits)) + (declare (type (unsigned-byte 8) n-bits)) + (tagbody + PAD-CHECK + (when (base64-encode-state-finished-input-p state) + (go PAD)) + INPUT-CHECK + (when (>= input-index input-end) + (go DONE)) + DO-INPUT + (when (< n-bits 6) + (setf bits (ldb (byte 16 0) + (logior (ash bits 8) (aref input input-index)))) + (incf input-index) + (incf n-bits 8)) + OUTPUT-CHECK + (when (>= output-index output-end) + (go DONE)) + DO-OUTPUT + (decf n-bits 6) + (setf (aref output output-index) + (funcall converter (aref table (ldb (byte 6 n-bits) bits)))) + (incf output-index) + (if (>= n-bits 6) + (go OUTPUT-CHECK) + (go INPUT-CHECK)) + DONE + (unless lastp + (go RESTORE-STATE)) + (setf (base64-encode-state-finished-input-p state) t) + (cond + ((= n-bits 2) + (setf (base64-encode-state-padding-remaining state) 3)) + ((= n-bits 4) + (setf (base64-encode-state-padding-remaining state) 2))) + PAD + (cond + ((or (zerop n-bits) + (zerop (base64-encode-state-padding-remaining state))) + (go RESTORE-STATE)) + ((= n-bits 2) + (go DO-PAD-FOR-2-BITS)) + ((= n-bits 4) + (go DO-PAD-FOR-4-BITS))) + DO-PAD-FOR-2-BITS + (let ((padding-remaining (base64-encode-state-padding-remaining state))) + (declare (type (integer 0 3) padding-remaining)) + (when (and (>= padding-remaining 3) + (< output-index output-end)) + (setf (aref output output-index) + (funcall converter + (aref table (ash (ldb (byte 2 0) bits) 4)))) + (incf output-index) + (decf padding-remaining)) + (when (and (>= padding-remaining 2) + (< output-index output-end)) + (setf (aref output output-index) (funcall converter #\=)) + (incf output-index) + (decf padding-remaining)) + (when (and (>= padding-remaining 1) + (< output-index output-end)) + (setf (aref output output-index) (funcall converter #\=)) + (incf output-index) + (decf padding-remaining)) + (when (zerop padding-remaining) + (setf n-bits 0)) + (setf (base64-encode-state-padding-remaining state) padding-remaining) + (go RESTORE-STATE)) + DO-PAD-FOR-4-BITS + (let ((padding-remaining (base64-encode-state-padding-remaining state))) + (declare (type (integer 0 3) padding-remaining)) + (when (and (>= padding-remaining 2) + (< output-index output-end)) + (setf (aref output output-index) + (funcall converter + (aref table (ash (ldb (byte 4 0) bits) 2)))) + (incf output-index) + (decf padding-remaining)) + (when (and (>= padding-remaining 1) + (< output-index output-end)) + (setf (aref output output-index) (funcall converter #\=)) + (incf output-index) + (decf padding-remaining)) + (when (zerop padding-remaining) + (setf n-bits 0)) + (setf (base64-encode-state-padding-remaining state) padding-remaining) + (go RESTORE-STATE)) + RESTORE-STATE + (setf (base64-encode-state-bits state) bits + (base64-encode-state-n-bits state) n-bits)) + (values input-index output-index))) -(defmethod encoding-tools ((format (eql :base64))) - (values #'encode-octets-base64 #'encoded-length-base64 - *base64-encode-table*)) +(defun octets->octets/encode/base64 (state output input + output-start output-end + input-start input-end lastp) + (declare (type simple-octet-vector output)) + (declare (optimize speed)) + (base64-encoder state output input output-start output-end + input-start input-end lastp #'char-code)) -(defmethod encoding-tools ((format (eql :base64url))) - (values #'encode-octets-base64 #'encoded-length-base64 - *base64url-encode-table*)) +(defun octets->string/base64 (state output input + output-start output-end + input-start input-end lastp) + (declare (type simple-string output)) + (declare (optimize speed)) + (base64-encoder state output input output-start output-end + input-start input-end lastp #'identity)) + +(defun encoded-length/base64 (count) + "Return the number of characters required to encode COUNT octets in Base64." + (* (ceiling count 3) 4)) (defvar *base64-decode-table* (make-decode-table *base64-encode-table*)) @@ -50,40 +168,127 @@ (make-decode-table *base64url-encode-table*)) (declaim (type decode-table *base64url-decode-table*)) -(defun decode-octets-base64 (string start end length table writer) - (declare (type index start end)) - (declare (type function writer)) - (declare (type decode-table table)) - (flet ((do-decode (transform) - (loop with bits of-type (unsigned-byte 16) = 0 - with n-bits of-type (unsigned-byte 8) = 0 - for i from start below end - for char = (aref string i) - for value = (dtref table (funcall transform char)) - do (cond - ((>= value 0) - (setf bits (logand (logior (ash bits 6) value) #xffff)) - (incf n-bits 6) - (when (>= n-bits 8) - (decf n-bits 8) - (funcall writer (logand (ash bits (- n-bits)) #xff)) - (setf bits (logand bits #xff)))) - ((eql (funcall transform char) - (funcall transform #\=))) - ((= value +dt-invalid+) - (error "bad character ~A in base64 decoding" char)))))) - (declare (inline do-decode)) - (decode-dispatch string #'do-decode))) +(defstruct (base64-decode-state + (:include decode-state) + (:copier nil) + (:predicate nil) + (:constructor %make-base64-decode-state + (table + &aux (descriptor (base64-format-descriptor))))) + (bits 0 :type (unsigned-byte 16)) + (n-bits 0 :type (unsigned-byte 8)) + (padding-remaining 0 :type (integer 0 3)) + (table *base64-decode-table* :read-only t :type decode-table)) + +(defun make-base64-decode-state (case-fold map01) + (declare (ignore case-fold map01)) + (%make-base64-decode-state *base64-decode-table*)) + +(defun make-base64url-decode-state (case-fold map01) + (declare (ignore case-fold map01)) + (%make-base64-decode-state *base64url-decode-table*)) + +(defun base64-decoder (state output input + output-index output-end + input-index input-end lastp converter) + (declare (type base64-decode-state state)) + (declare (type simple-octet-vector output)) + (declare (type index output-index output-end input-index input-end)) + (declare (type function converter)) + (let ((bits (base64-decode-state-bits state)) + (n-bits (base64-decode-state-n-bits state)) + (padding-remaining (base64-decode-state-padding-remaining state)) + (table (base64-decode-state-table state))) + (declare (type (unsigned-byte 16) bits)) + (declare (type fixnum n-bits)) + (declare (type (integer 0 6) padding-remaining)) + (tagbody + PAD-CHECK + (when (base64-decode-state-finished-input-p state) + (go EAT-EQUAL-CHECK-PAD)) + OUTPUT-AVAILABLE-CHECK + (when (< n-bits 8) + (go INPUT-AVAILABLE-CHECK)) + OUTPUT-SPACE-CHECK + (when (>= output-index output-end) + (go DONE)) + DO-OUTPUT + (decf n-bits 8) + (setf (aref output output-index) (logand (ash bits (- n-bits)) #xff) + bits (logand bits #xff)) + (incf output-index) + (go INPUT-AVAILABLE-CHECK) + INPUT-AVAILABLE-CHECK + (when (>= input-index input-end) + (go DONE)) + DO-INPUT + (let* ((c (aref input input-index)) + (v (funcall converter c)) + (d (dtref table v))) + (when (= v (if (typep input 'simple-octet-vector) + (char-code #\=) + (funcall converter #\=))) + (go SAW-EQUAL)) + (when (= d +dt-invalid+) + (error "invalid base64 character ~A at position ~D" c input-index)) + (incf input-index) + (setf bits (ldb (byte 16 0) (logior (ash bits 6) d))) + (incf n-bits 6) + (go OUTPUT-AVAILABLE-CHECK)) + DONE + (unless lastp + (go RESTORE-STATE)) + SAW-EQUAL + (setf (base64-decode-state-finished-input-p state) t) + (cond + ((zerop n-bits) + (go RESTORE-STATE)) + ((= n-bits 2) + (setf padding-remaining 3)) + ((= n-bits 4) + (setf padding-remaining 2))) + EAT-EQUAL-CHECK-PAD + (when (zerop padding-remaining) + (go RESTORE-STATE)) + EAT-EQUAL-CHECK-INPUT + (when (>= input-index input-end) + (go RESTORE-STATE)) + EAT-EQUAL + (let ((v (aref input input-index))) + (unless (= (funcall converter v) + (if (typep input 'simple-octet-vector) + (char-code #\=) + (funcall converter #\=))) + (error "invalid base64 input ~A at position ~D" v input-index)) + (incf input-index) + (decf padding-remaining) + (go EAT-EQUAL-CHECK-PAD)) + RESTORE-STATE + (setf (base64-decode-state-n-bits state) n-bits + (base64-decode-state-bits state) bits + (base64-decode-state-padding-remaining state) padding-remaining)) + (values input-index output-index))) + +(defun string->octets/base64 (state output input + output-index output-end + input-index input-end lastp) + (declare (type simple-string input)) + (base64-decoder state output input output-index output-end + input-index input-end lastp #'char-code)) + +(defun octets->octets/decode/base64 (state output input + output-index output-end + input-index input-end lastp) + (declare (type simple-octet-vector input)) + (base64-decoder state output input output-index output-end + input-index input-end lastp #'identity)) (defun decoded-length-base64 (length) (* (ceiling length 4) 3)) -(defmethod decoding-tools ((format (eql :base64)) &key case-fold map01) - (declare (ignorable case-fold map01)) - (values #'decode-octets-base64 #'decoded-length-base64 - *base64-decode-table*)) - -(defmethod decoding-tools ((format (eql :base64url)) &key case-fold map01) - (declare (ignorable case-fold map01)) - (values #'decode-octets-base64 #'decoded-length-base64 - *base64url-decode-table*)) +(register-descriptor-and-constructors :base64 (base64-format-descriptor) + #'make-base64-encode-state + #'make-base64-decode-state) +(register-descriptor-and-constructors :base64url (base64-format-descriptor) + #'make-base64url-encode-state + #'make-base64url-decode-state) diff --git a/base85.lisp b/base85.lisp index 90e4766..373daa5 100644 --- a/base85.lisp +++ b/base85.lisp @@ -5,69 +5,283 @@ (defvar *base85-encode-table* #.(coerce "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz!#$%&()*+-;<=>?@^_`{|}~" 'simple-base-string)) -(defun encoded-length-base85 (count) +(defun base85-format-descriptor () + (let* ((cell (load-time-value (list nil))) + (fd (car cell))) + (if fd + fd + (setf (car cell) + (make-format-descriptor #'encoded-length/base85 + #'octets->string/base85 + #'octets->octets/encode/base85 + #'decoded-length-base85 + #'string->octets/base85 + #'octets->octets/decode/base85))))) + +(defstruct (base85-encode-state + (:include encode-state) + (:copier nil) + (:predicate nil) + (:constructor make-base85-encode-state + (&aux (descriptor (base85-format-descriptor))))) + ;; TODO: Clever hack for little-endian machines: fill in GROUP + ;; back-to-front, using PENDING to count down, then use SBCL's + ;; %VECTOR-RAW-BITS or similar to read out the group in proper + ;; big-endian order. We could even do the same thing on x86-64 if we + ;; made the buffer bigger. + ;; + ;; For now, though, we'll fill GROUP front-to-back and PENDING will + ;; indicate how many octets we've filled in. + #+nil + (group (make-array 4 :element-type '(unsigned-byte 8)) + :read-only t :type (simple-array (unsigned-byte 8) (4))) + (bits 0 :type (unsigned-byte 32)) + (pending 0 :type (integer 0 4)) + (output-group (make-array 5 :element-type 'base-char) + :read-only t :type (simple-array base-char (5))) + (output-pending 0 :type (integer 0 5)) + (table *base85-encode-table* :read-only t :type (simple-array base-char (85)))) + +(defun encoded-length/base85 (count) "Return the number of characters required to encode COUNT octets in Base85." (* (ceiling count 4) 5)) -(defun encode-octets-base85 (octets start end table writer) - (declare (type (simple-array (unsigned-byte 8) (*)) octets)) - (declare (type index start end)) - (declare (type (simple-array base-char (85)) table)) - (declare (type function writer)) - (loop with length = (- end start) - with buffer = (make-string 5) - while (plusp length) - do (let ((group (do ((g 0) - (i 24 (- i 8))) - ((or (zerop length) (< i 0)) g) - (setf g (logior (ash (aref octets start) i) g)) - (incf start) - (decf length)))) - (loop for i from 4 downto 0 - do (multiple-value-bind (g b) (truncate group 85) - (setf group g - (aref buffer i) (aref table b))) - finally (dotimes (i 5) - (funcall writer (aref buffer i))))))) - -(defmethod encoding-tools ((format (eql :base85))) - (values #'encode-octets-base85 #'encoded-length-base85 - *base85-encode-table*)) +(declaim (inline base85-encode)) +(defun base85-encoder (state output input + output-index output-end + input-index input-end lastp converter) + (declare (type base85-encode-state state)) + (declare (type simple-octet-vector input)) + (declare (type index output-index output-end input-index input-end)) + (declare (type function converter)) + (let ((bits (base85-encode-state-bits state)) + (pending (base85-encode-state-pending state)) + (output-group (base85-encode-state-output-group state)) + (output-pending (base85-encode-state-output-pending state)) + (table (base85-encode-state-table state))) + (declare (type index input-index output-index)) + (declare (type (unsigned-byte 32) bits)) + (declare (type (integer 0 4) pending)) + (declare (type (integer 0 5) output-pending)) + (flet ((expand-for-output (bits output-group) + (loop for i from 0 to 4 + do (multiple-value-bind (b index) (truncate bits 85) + (setf bits b + (aref output-group i) (aref table index))) + finally (setf output-pending 5)))) + (declare (inline expand-for-output)) + (tagbody + PAD-CHECK + (when (base85-encode-state-finished-input-p state) + (go FLUSH-BITS)) + INPUT-CHECK + (when (>= input-index input-end) + (go DONE)) + DO-INPUT + (when (< pending 4) + (setf bits (ldb (byte 32 0) + (logior (ash bits 8) (aref input input-index)))) + (incf input-index) + (unless (= (incf pending) 4) + (go INPUT-CHECK))) + EXPAND-FOR-OUTPUT + (expand-for-output bits output-group) + OUTPUT-CHECK + (when (>= output-index output-end) + (go DONE)) + DO-OUTPUT + (when (> output-pending 0) + (setf (aref output output-index) + (funcall converter + (aref output-group (decf output-pending)))) + (incf output-index) + (cond + ((zerop output-pending) + (setf bits 0) + (setf pending 0) + (go INPUT-CHECK)) + (t + (go OUTPUT-CHECK)))) + DONE + (unless lastp + (go RESTORE-STATE)) + (setf (base85-encode-state-finished-input-p state) t) + ;; Make it appear as though the input were padded with zeros to a + ;; full input group. + (let ((for-pad (- 4 pending))) + (setf bits (ldb (byte 32 0) (ash bits (* 8 for-pad)))) + (setf pending 4) + (expand-for-output bits output-group)) + FLUSH-BITS + (when (zerop output-pending) + (go RESTORE-STATE)) + FLUSH-OUTPUT-CHECK + (when (>= output-index output-end) + (go RESTORE-STATE)) + DO-FLUSH-OUTPUT + (when (> output-pending 0) + (setf (aref output output-index) + (funcall converter + (aref output-group (decf output-pending)))) + (incf output-index) + (cond + ((zerop output-pending) + (setf bits 0) + (setf pending 0) + (go RESTORE-STATE)) + (t + (go FLUSH-OUTPUT-CHECK)))) + RESTORE-STATE + (setf (base85-encode-state-bits state) bits + (base85-encode-state-pending state) pending + (base85-encode-state-output-pending state) output-pending)) + (values input-index output-index)))) + +(defun octets->octets/encode/base85 (state output input + output-start output-end + input-start input-end lastp) + (declare (type simple-octet-vector output)) + (declare (optimize speed)) + (base85-encoder state output input output-start output-end + input-start input-end lastp #'char-code)) + +(defun octets->string/base85 (state output input + output-start output-end + input-start input-end lastp) + (declare (type simple-string output)) + (declare (optimize speed)) + (base85-encoder state output input output-start output-end + input-start input-end lastp #'identity)) (defvar *base85-decode-table* (make-decode-table *base85-encode-table*)) (declaim (type decode-table *base85-decode-table*)) +(defstruct (base85-decode-state + (:include decode-state) + (:copier nil) + (:predicate nil) + (:constructor %make-base85-decode-state + (&aux (descriptor (base85-format-descriptor))))) + (bits 0 :type (unsigned-byte 32)) + (pending 0 :type (integer 0 5)) + (output-pending 0 :type (integer 0 4)) + (table *base85-decode-table* :read-only t :type decode-table)) + +(defun make-base85-decode-state (case-fold map01) + (declare (ignore case-fold map01)) + (%make-base85-decode-state)) + +(defun base85-decoder (state output input + output-index output-end + input-index input-end lastp converter) + (declare (type base85-decode-state state)) + (declare (type simple-octet-vector output)) + (declare (type index output-index output-end input-index input-end)) + (declare (type function converter)) + (let ((bits (base85-decode-state-bits state)) + (pending (base85-decode-state-pending state)) + (output-pending (base85-decode-state-output-pending state)) + (table (base85-decode-state-table state))) + (declare (type (unsigned-byte 32) bits)) + (declare (type (integer 0 5) pending)) + (declare (type (integer 0 4) output-pending)) + (tagbody + FINISHED-CHECK + (when (base85-decode-state-finished-input-p state) + (go FLUSH-BITS)) + OUTPUT-AVAILABLE-CHECK + (when (zerop output-pending) + (go INPUT-AVAILABLE-CHECK)) + OUTPUT-SPACE-CHECK + (when (>= output-index output-end) + (go DONE)) + DO-OUTPUT + (setf (aref output output-index) + (ldb (byte 8 (* (decf output-pending) 8)) bits)) + (incf output-index) + (cond + ((zerop output-pending) + (setf bits 0) + (setf pending 0) + (setf output-pending 0) + (go INPUT-AVAILABLE-CHECK)) + (t + (go OUTPUT-SPACE-CHECK))) + INPUT-AVAILABLE-CHECK + (when (>= input-index input-end) + (go DONE)) + DO-INPUT + (cond + ((< pending 5) + (let* ((c (aref input input-index)) + (v (funcall converter c)) + (d (dtref table v))) + (when (= d +dt-invalid+) + (error "invalid base85 character ~A at position ~D" c input-index)) + ;; FIXME: check for overflow. + (setf bits (+ (* bits 85) d)) + (incf pending) + (incf input-index) + (go INPUT-AVAILABLE-CHECK))) + (t + (setf output-pending 4) + (go OUTPUT-SPACE-CHECK))) + DONE + (unless lastp + (go RESTORE-STATE)) + (setf (base85-decode-state-finished-input-p state) t) + ;; We should *always* have a complete group or nothing at this + ;; point. + EOT-VALIDITY-CHECK + (when (<= 1 pending 4) + (error "invalid base85 input")) + (setf output-pending (if (zerop pending) 0 4)) + FLUSH-BITS + (when (zerop output-pending) + (go RESTORE-STATE)) + FLUSH-OUTPUT-CHECK + (when (>= output-index output-end) + (go RESTORE-STATE)) + DO-FLUSH-OUTPUT + (when (> output-pending 0) + (setf (aref output output-index) + (ldb (byte 8 (* (decf output-pending) 8)) bits)) + (incf output-index) + (cond + ((zerop output-pending) + (setf bits 0) + (setf pending 0) + (setf output-pending 0) + (go RESTORE-STATE)) + (t + (go FLUSH-OUTPUT-CHECK)))) + RESTORE-STATE + (setf (base85-decode-state-bits state) bits + (base85-decode-state-pending state) pending + (base85-decode-state-output-pending state) output-pending)) + (values input-index output-index))) + +(defun string->octets/base85 (state output input + output-index output-end + input-index input-end lastp) + (declare (type simple-string input)) + (base85-decoder state output input output-index output-end + input-index input-end lastp #'char-code)) + +(defun octets->octets/decode/base85 (state output input + output-index output-end + input-index input-end lastp) + (declare (type simple-octet-vector input)) + (base85-decoder state output input output-index output-end + input-index input-end lastp #'identity)) + (defun decoded-length-base85 (length) (multiple-value-bind (n-groups rem) (truncate length 5) (unless (zerop rem) (error "base85 input length ~D must be a multiple of 5" length)) (* n-groups 4))) -(defun decode-octets-base85 (string start end length table writer) - (declare (type index start end)) - (declare (type function writer)) - (declare (type decode-table table)) - (flet ((do-decode (transform) - (loop while (< start end) - do (do ((i 0 (1+ i)) - (acc 0)) - ((>= i 5) - (incf start 5) - (unless (< acc (ash 1 32)) - (error "invalid base85 sequence")) - (let ((count (min length 4))) - (dotimes (i count) - (funcall writer (ldb (byte 8 (* (- 3 i) 8)) acc))) - (decf length count))) - (let* ((b (funcall transform (aref string (+ start i)))) - (d (dtref table b))) - (when (= d +dt-invalid+) - (error "invalid base85 character: ~X" b)) - (setf acc (+ (* acc 85) d))))))) - (declare (inline do-decode)) - (decode-dispatch string #'do-decode))) - -(defmethod decoding-tools ((format (eql :base85)) &key case-fold map01) - (declare (ignorable case-fold map01)) - (values #'decode-octets-base85 #'decoded-length-base85 - *base85-decode-table*)) +(register-descriptor-and-constructors :base85 (base85-format-descriptor) + #'make-base85-encode-state + #'make-base85-decode-state) + diff --git a/binascii.asd b/binascii.asd index 10aaf52..e39bc88 100644 --- a/binascii.asd +++ b/binascii.asd @@ -12,7 +12,9 @@ :description "A library of ASCII encoding schemes for binary data" :components ((:static-file "LICENSE") (:file "package") - (:file "octets" :depends-on ("package")) + (:file "types" :depends-on ("package")) + (:file "format" :depends-on ("types")) + (:file "octets" :depends-on ("types" "format")) (:file "ascii85" :depends-on ("octets")) (:file "base85" :depends-on ("octets")) (:file "base64" :depends-on ("octets")) diff --git a/format.lisp b/format.lisp new file mode 100644 index 0000000..86fc234 --- /dev/null +++ b/format.lisp @@ -0,0 +1,50 @@ +;;;; format.lisp -- a central repository for encoding formats and accessors + +(cl:in-package :binascii) + +(defvar *format-descriptors* (make-hash-table)) + +(defvar *format-state-constructors* (make-hash-table)) + +(defun unknown-format-error (format) + (error "Unknown format ~A" format)) + +(defun find-descriptor-for-format-or-lose (format) + (or (gethash format *format-descriptors*) + (unknown-format-error format))) + +(defun find-encode-state-constructor-or-lose (format) + (or (car (gethash format *format-state-constructors*)) + (unknown-format-error format))) + +(defun find-decode-state-constructor-or-lose (format) + (or (cdr (gethash format *format-state-constructors*)) + (unknown-format-error format))) + +(defun register-descriptor-and-constructors (format + descriptor + encoder-constructor + decoder-constructor) + (setf (gethash format *format-descriptors*) descriptor) + (setf (gethash format *format-state-constructors*) + (cons encoder-constructor decoder-constructor)) + format) + +(defun find-encoder (format) + "Return the appropriate ENCODE-STATE for FORMAT." + (etypecase format + (symbol + (let ((constructor (find-encode-state-constructor-or-lose format))) + (funcall (the function constructor)))) + (encode-state + format))) + +(defun find-decoder (format case-fold map01) + "Return the appropriate DECODE-STATE for FORMAT. If FORMAT is a symbol, +use CASE-FOLD and MAP01 to parameterize the returned decoder." + (etypecase format + (symbol + (let ((constructor (find-decode-state-constructor-or-lose format))) + (funcall (the function constructor) case-fold map01))) + (decode-state + format))) diff --git a/octets.lisp b/octets.lisp index cae7171..948af3f 100644 --- a/octets.lisp +++ b/octets.lisp @@ -2,25 +2,6 @@ (cl:in-package :binascii) -(deftype index () '(mod #.array-dimension-limit)) - -(defgeneric encoding-tools (format) - (:documentation "Return three values: the basic encoding function for -FORMAT, an encoded-length function for FORMAT, and an encoding table for -FORMAT. The encoding table specifies ASCII characters for encoded -values and is typically a SIMPLE-BASE-STRING.")) - -(defgeneric decoding-tools (format &key case-fold map01) - (:documentation "Return three values: the basic decoding function for -FORMAT, a decoded-length function for FORMAT, and the decoding table for -FORMAT. CASE-FOLD is a generalized boolean indicating whether to -compare characters case-insensitively. MAP01 should be either NIL, -#\\I, or #\\L; if MAP01 is not NIL, then its value indicates what -character #\\1 maps to. If MAP01 is not NIL, then \\#0 maps to O. - -CASE-FOLD and MAP01 are silently ignored if they do not apply to -FORMAT.")) - (defun case-fold-decode-table (decode-table encode-table) (loop with table = (copy-seq decode-table) for c across encode-table @@ -45,113 +26,146 @@ FORMAT.")) (when errorp (error "Unsupported element-type ~A" element-type))))) -(defun determine-encoding-writer (destination length element-type) - (etypecase destination - (null - (flet ((do-encode (etype transform) - (let ((v (make-array (the fixnum length) :element-type etype)) - (i -1)) - (values #'(lambda (c) (setf (aref v (incf i)) - (funcall transform c))) - v)))) - (declare (inline do-encode)) - (cond - ((eq element-type 'character) - (do-encode 'character #'identity)) - ((eq element-type 'base-char) - (do-encode 'base-char #'identity)) - (t - (do-encode '(unsigned-byte 8) #'char-code))))) - (stream - (cond - ((or (eq element-type 'character) (eq element-type 'base-char)) - (values #'(lambda (c) (write-char c destination)) nil)) - (t - (values #'(lambda (c) (write-byte (char-code c) destination)) nil)))) - (string - (unless (or (eq element-type 'character) (eq element-type 'base-char)) - (error "Cannot output to a string with ~A :ELEMENT-TYPE" element-type)) - (values #'(lambda (c) (vector-push-extend c destination)) nil)) - ((array (unsigned-byte 8) (*)) - (unless (eq element-type 'octet) - (error "Cannot output to an octet vector with ~A :ELEMENT-TYPE" element-type)) - (values #'(lambda (c) (vector-push-extend (char-code c) destination)) nil)))) - -(defun decode-octets* (destination string decode-fun length-fun decode-table - start end decoded-length) - (declare (type function decode-fun length-fun)) - (let* ((end (or end (length string))) - ;; For better or worse, a provided decoded length from the user - ;; always wins. But LENGTH-FUN may do some additional - ;; validation of its own, so we want to make sure to call it - ;; too (the additional cost of doing so is negligible). - (guessed-length (funcall length-fun (- end start))) - (length (or decoded-length guessed-length))) - (declare (type index length)) - (etypecase destination - (null - (let* ((octets (make-array length :element-type '(unsigned-byte 8))) - (i -1) - (actual-length 0)) - (declare (type index actual-length)) - (funcall decode-fun string start end length decode-table - #'(lambda (o) - (setf (aref octets (incf i)) o) - (incf actual-length))) - (if (= actual-length length) - octets - ;; FIXME: if we wanted to dig into SBCL internals, there's - ;; a less consy way: (SB-KERNEL:%SHRINK-VECTOR). - (subseq octets 0 actual-length)))) - (stream - (funcall decode-fun string start end length decode-table - #'(lambda (o) (write-byte o destination))) - nil) - ((array (unsigned-byte 8) (*)) - (funcall decode-fun string start end length decode-table - #'(lambda (o) (vector-push-extend o destination))) - nil)))) - -(defun encode-octets (destination octets format - &key (start 0) end (element-type 'base-char) - &allow-other-keys) +(declaim (inline array-data-and-offsets)) +(defun array-data-and-offsets (v start end) + "Like ARRAY-DISPLACEMENT, only more useful." + #+sbcl + (sb-kernel:with-array-data ((v v) (start start) (end end)) + (values v start end)) + #+cmu + (lisp::with-array-data ((v v) (start start) (end end)) + (values v start end)) + #-(or sbcl cmu) + (values v start (or end (length v)))) + +(defun encode-to-fresh-vector (octets state start end element-type) + (declare (type encode-state state)) + (multiple-value-bind (input start end) + (array-data-and-offsets octets start end) + (let* ((fd (state-descriptor state)) + (length (funcall (fd-encoded-length fd) (- end start)))) + (declare (type format-descriptor fd)) + (flet ((frob (etype encode-fun) + (let ((v (make-array length :element-type etype))) + (multiple-value-bind (input-index output-index) + (funcall encode-fun state v input + 0 length start end t) + (declare (ignore input-index)) + (if (= output-index length) + v + (subseq v 0 output-index)))))) + (declare (inline frob)) + (ecase (canonicalize-element-type element-type) + (character + (frob 'character (fd-octets->string fd))) + (base-char + (frob 'base-char (fd-octets->string fd))) + (octet + (frob '(unsigned-byte 8) (fd-octets->octets/encode fd)))))))) + +(defun encode (octets format &key (start 0) end (element-type 'base-char)) + (encode-to-fresh-vector octets (find-encoder format) start end element-type)) + +(defun encode-octets (destination octets format &key (start 0) end + (output-start 0) output-end (element-type 'base-char) + finishp) "Encode OCTETS between START and END into ASCII characters -according to FORMAT and written to DESTINATION according to ELEMENT-TYPE. +according to FORMAT and write them to DESTINATION according to ELEMENT-TYPE. If DESTINATION is NIL and ELEMENT-TYPE is a subtype of CHARACTER, then a -string is returned. If DESTINATION is NIL and ELEMENT-TYPE is +string is returned. If DESTINATION is NIL and ELEMENT-TYPE is \(UNSIGNED-BYTE 8) or an equivalent type, then an octet vector is returned. -If DESTINATION is a STREAM, then the result is written to DESTINATION -using WRITE-CHAR or WRITE-BYTE as chosen by ELEMENT-TYPE. - If ELEMENT-TYPE is a subtype of CHARACTER, then DESTINATION may also be -a string with a fill pointer. The result is written to the string as if -by use of VECTOR-PUSH-EXTEND. Similarly, if ELEMENT-TYPE -is (UNSIGNED-BYTE 8) or an equivalent type, then DESTINATION may be an -octet vector with a fill pointer." - (multiple-value-bind (encode-fun length-fun table) (encoding-tools format) - (let* ((end (or end (length octets))) - (length (- end start)) - (canonical-element-type (canonicalize-element-type element-type))) - (multiple-value-bind (writer return-value) - (determine-encoding-writer destination (funcall length-fun length) - canonical-element-type) - (funcall encode-fun octets start end table writer) - return-value)))) - -(defun decode-octets (destination string format - &key (start 0) end decoded-length case-fold map01 - &allow-other-keys) +a string. Similarly, if ELEMENT-TYPE is (UNSIGNED-BYTE 8) or an +equivalent type, then DESTINATION may be an octet vector. In this case, +OUTPUT-START and OUTPUT-END are used to determine the portion of +DESTINATION where the encoded output may be placed. + +If DESTINATION is not NIL, The index of the first input element that was +not read and the index of the first output element that was not updated +are returned as multiple values. respectively, written are returned as +multiple values. ELEMENT-TYPE is ignored. + +If FINISHP is true, then in addition to any encoding of OCTETS, also output +any necessary padding required by FORMAT." + (let* ((state (find-encoder format)) + (fd (state-descriptor state))) + (declare (type encode-state state)) + (declare (type format-descriptor fd)) + (flet ((frob (encode-fun) + (multiple-value-bind (input input-start input-end) + (array-data-and-offsets octets start end) + (multiple-value-bind (output output-start output-end) + (array-data-and-offsets destination output-start output-end) + (funcall encode-fun state + output input + output-start output-end + input-start input-end finishp))))) + (declare (inline frob)) + (etypecase destination + (null + (encode-to-fresh-vector octets state start end element-type)) + (string + (frob (fd-octets->string fd))) + ((array (unsigned-byte 8) (*)) + (frob (fd-octets->octets/encode fd))))))) + +(defun decode-to-fresh-vector (string state start end decoded-length) + (declare (type decode-state state)) + (multiple-value-bind (input start end) + (array-data-and-offsets string start end) + (let* ((fd (state-descriptor state)) + (length (or decoded-length + (funcall (fd-decoded-length fd) (- end start))))) + (declare (type format-descriptor fd)) + (flet ((frob (v decode-fun) + (multiple-value-bind (input-index output-index) + (funcall decode-fun state v input 0 length start end t) + ;; FIXME: we should check to see if we actually + ;; consumed all the input. If we didn't, then we need + ;; to reallocate V and continue decoding. Even though + ;; we said LASTP=T. Hmmm. + (declare (ignore input-index)) + (if (= output-index length) + v + (subseq v 0 output-index))))) + (let ((octets (make-array length :element-type '(unsigned-byte 8)))) + (etypecase string + (simple-string + (frob octets (fd-string->octets fd))) + (simple-octet-vector + (frob octets (fd-octets->octets/decode fd))))))))) + +(defun decode (string format &key (start 0) end case-fold map01 decoded-length) + (decode-to-fresh-vector string (find-decoder format case-fold map01) + start end decoded-length)) + +(defun decode-octets (destination string format &key (start 0) end + (output-start 0) output-end case-fold map01 finishp + decoded-length) "Decode the characters of STRING between START and END into octets -according to FORMAT. DECODED-LENGTH indictes the number of decoded octets -to expect. DESTINATION may be NIL, an octet vector with a fill-pointer, -or a stream. DECODED-LENGTH does not need to be provided for all formats." - (declare (ignorable decoded-length)) - (multiple-value-bind (decode length table) - (decoding-tools format :case-fold case-fold :map01 map01) - (decode-octets* destination string decode length table - start end decoded-length))) +according to FORMAT. DECODED-LENGTH indicates the number of decoded +octets to expect. DESTINATION may be NIL." + (let ((state (find-decoder format case-fold map01))) + (declare (type decode-state state)) + (flet ((frob (decode-fun) + (multiple-value-bind (input input-start input-end) + (array-data-and-offsets string start end) + (multiple-value-bind (output output-start output-end) + (array-data-and-offsets destination output-start output-end) + (funcall decode-fun state + output input + output-start output-end + input-start input-end finishp))))) + (declare (inline frob)) + (etypecase string + (null + (decode-to-fresh-vector string state start end decoded-length)) + (string + (frob (fd-string->octets (state-descriptor state)))) + ((array (unsigned-byte 8) (*)) + (frob (fd-octets->octets/decode (state-descriptor state)))))))) (defconstant +dt-invalid+ -1) @@ -173,35 +187,3 @@ or a stream. DECODED-LENGTH does not need to be provided for all formats." (if (>= i 256) +dt-invalid+ (aref table i))) - -;;; By controlling the inlinability of this function, we can specialize -;;; decode functions on simple arrays while going through full calls for -;;; non-simple arrays, thus providing a balance of -;;; performance/functionality/code size. -;;; -;;; Yes, it's a little ugly, but it's because INLINE/NOTINLINE doesn't -;;; work quite right on local functions in some implementations. -;;; Assuming the implementation supports INLINE/NOTINLINE correctly, -;;; this way should work. -(declaim (inline decode-dispatch-wrap)) -(defun decode-dispatch-wrap (decode-fun transform) - (funcall decode-fun transform)) -(declaim (notinline decode-dispatch-wrap)) - -(declaim (inline decode-dispatch)) -(defun decode-dispatch (v decode-fun) - (etypecase v - ;; Probably not worth optimizing for BASE-CHAR vs. CHARACTER. - (simple-string - (locally (declare (inline decode-dispatch-wrap)) - (decode-dispatch-wrap decode-fun #'char-code))) - ((simple-array (unsigned-byte 8) (*)) - (locally (declare (inline decode-dispatch-wrap)) - (decode-dispatch-wrap decode-fun #'identity))) - ;; We're not particularly worried about speed in these two cases. - (string - (locally (declare (notinline decode-dispatch-wrap)) - (decode-dispatch-wrap decode-fun #'char-code))) - ((array (unsigned-byte 8) (*)) - (locally (declare (notinline decode-dispatch-wrap)) - (decode-dispatch-wrap decode-fun #'identity))))) diff --git a/package.lisp b/package.lisp index fc2b9ea..d1a5c19 100644 --- a/package.lisp +++ b/package.lisp @@ -2,4 +2,18 @@ (cl:defpackage :binascii (:use :cl) - (:export #:encode-octets #:decode-octets)) + (:shadow simple-string) + (:export + #:encode-octets #:encode + #:decode-octets #:decode + + ;; Format names. + #:base16 + #:hex + #:base32 + #:base32hex + #:base64 + #:base64url + #:base85 + #:ascii85 + )) diff --git a/tests/tests.lisp b/tests/tests.lisp index 44f5c21..8792145 100644 --- a/tests/tests.lisp +++ b/tests/tests.lisp @@ -44,11 +44,26 @@ STRING contains any character whose CHAR-CODE is greater than 255." (apply test-function name (cdr form))))) finally (return t))))) +(defvar *coding-output-element-type* 'base-char) + (defun encoding-test* (name input encoded-output decoded-length) - (let ((output (binascii:encode-octets nil input name :end decoded-length)) - (decoded-input (binascii:decode-octets nil encoded-output name - :decoded-length decoded-length))) - (when (mismatch output encoded-output) + (let* ((output (binascii:encode input name :end decoded-length + :element-type *coding-output-element-type*)) + (mismatchable-encoded-output + (cond + ((or (eql *coding-output-element-type* 'base-char) + (eql *coding-output-element-type* 'character)) + encoded-output) + ((equal *coding-output-element-type* '(unsigned-byte 8)) + (ascii-string-to-octets encoded-output)) + (t + (error "unknown value for *CODING-OUTPUT-ELEMENT-TYPE* ~A" + *coding-output-element-type*)))) + (decoded-input (binascii:decode mismatchable-encoded-output name + :decoded-length decoded-length))) + (unless (typep output `(array ,*coding-output-element-type* (*))) + (error "encoded output not of proper type")) + (when (mismatch output mismatchable-encoded-output) (error "encoding ~A failed on ~A, produced ~A, wanted ~A" name input output encoded-output)) (when (mismatch input decoded-input :end1 decoded-length :end2 decoded-length) @@ -56,7 +71,7 @@ STRING contains any character whose CHAR-CODE is greater than 255." name encoded-output decoded-input input)))) (defun encoding-test (name hexinput encoded-output &optional decoded-length) - (encoding-test* name (binascii:decode-octets nil hexinput :hex) + (encoding-test* name (binascii:decode hexinput :hex) encoded-output decoded-length)) (defun encoding-test-ascii (name ascii-input encoded-output @@ -69,10 +84,17 @@ STRING contains any character whose CHAR-CODE is greater than 255." (cons :encoding-ascii-test 'encoding-test-ascii))) (eval-when (:compile-toplevel :load-toplevel :execute) - (defparameter *encodings* '(:base16 :base32 :base32hex :base64 :base85))) + (defparameter *encodings* '(:base16 :base32 :base32hex :base64 :base85 :ascii85))) -#.(loop for e in *encodings* - collect `(rtest:deftest ,(intern (format nil "~A/TO-NIL/BASE-CHAR" e)) - (run-test-vector-file ,e *encoding-tests*) - t) into forms - finally (return `(progn ,@forms))) +#.(flet ((deftest-form (e eltype) + (let ((pretty-name (if (equal eltype '(unsigned-byte 8)) + 'ub8 + eltype))) + `(rtest:deftest ,(intern (format nil "~A/TO-NIL/~A" e pretty-name)) + (let ((*coding-output-element-type* ',eltype)) + (run-test-vector-file ,e *encoding-tests*)) + t)))) + (loop for e in *encodings* + append (loop for eltype in '(base-char character (unsigned-byte 8)) + collect (deftest-form e eltype)) into forms + finally (return `(progn ,@forms)))) diff --git a/types.lisp b/types.lisp new file mode 100644 index 0000000..00765e1 --- /dev/null +++ b/types.lisp @@ -0,0 +1,61 @@ +;;;; types.lisp -- various useful types + +(cl:in-package :binascii) + +(deftype index () '(mod #.array-dimension-limit)) + +(deftype simple-octet-vector (&optional (length '*)) + #+(or sbcl cmu) `(simple-array (unsigned-byte 8) (,length)) + #-(or sbcl cmu) `(array (unsigned-byte 8) (,length))) + +(deftype simple-string () + #+sbcl '(and cl:simple-string (not (simple-array nil (*)))) + #+cmu cl:simple-string + #-(or sbcl cmu) cl:string) + +(defun required-argument () + (error "Required argument not provided")) + +(defstruct (format-descriptor + (:conc-name fd-) + (:copier nil) + (:constructor make-format-descriptor + (encoded-length octets->string + octets->octets/encode + decoded-length + string->octets + octets->octets/decode))) + (encoded-length (required-argument) :type function :read-only t) + (octets->string (required-argument) :type function :read-only t) + (octets->octets/encode (required-argument) :type function :read-only t) + (decoded-length (required-argument) :type function :read-only t) + (string->octets (required-argument) :type function :read-only t) + (octets->octets/decode (required-argument) :type function :read-only t)) + +(defstruct (state + (:copier nil) + (:predicate nil) + (:constructor nil)) + (descriptor (required-argument) :type format-descriptor :read-only t) + ;; FINISHED-INPUT-P is either T or NIL depending on whether we have + ;; seen all of the input. + (finished-input-p nil) + ;; Likewise for FINISHED-OUTPUT-P. + (finished-output-p nil)) + +(defstruct (encode-state + (:include state) + (:copier nil) + (:predicate nil) + (:constructor nil)) + ;; LINE-BREAK describes after how many characters we should be + ;; inserting newlines into the encoded output. It is zero if we + ;; should never insert newlines. + (line-break 0 :type (integer 0 *))) + +(defstruct (decode-state + (:include state) + (:copier nil) + (:predicate nil) + (:constructor nil)) + )