From: Christophe R. <cr...@us...> - 2004-09-15 21:31:37
|
Update of /cvsroot/sbcl/sbcl/src/code In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv15494/src/code Modified Files: Tag: character_branch fd-stream.lisp Log Message: 0.8.13.77.character.17: "We had to make our own entertainment" Commit (untested) utf8 external format for fd-streams. Index: fd-stream.lisp =================================================================== RCS file: /cvsroot/sbcl/sbcl/src/code/fd-stream.lisp,v retrieving revision 1.46.4.3 retrieving revision 1.46.4.4 diff -u -d -r1.46.4.3 -r1.46.4.4 --- fd-stream.lisp 7 Sep 2004 19:47:34 -0000 1.46.4.3 +++ fd-stream.lisp 15 Sep 2004 21:31:28 -0000 1.46.4.4 @@ -86,7 +86,8 @@ ;; timeout specified for this stream, or NIL if none (timeout nil :type (or index null)) ;; pathname of the file this stream is opened to (returned by PATHNAME) - (pathname nil :type (or pathname null))) + (pathname nil :type (or pathname null)) + (external-format :default)) (def!method print-object ((fd-stream file-stream) stream) (declare (type stream stream)) (print-unreadable-object (fd-stream stream :type t :identity t) @@ -197,6 +198,33 @@ (frob-output stream (fd-stream-obuf-sap stream) 0 length t) (setf (fd-stream-obuf-tail stream) 0)))) +(defmacro output-wrapper/variable-width ((stream size buffering) + &body body) + (let ((stream-var (gensym)) + (size-var (gensym))) + `(let ((,stream-var ,stream) + (,size-var ,size)) + ,(unless (eq (car buffering) :none) + `(when (< (fd-stream-obuf-length ,stream-var) + (+ (fd-stream-obuf-tail ,stream-var) + ,size-var)) + (flush-output-buffer ,stream-var))) + ,(unless (eq (car buffering) :none) + `(when (> (fd-stream-ibuf-tail ,stream-var) + (fd-stream-ibuf-head ,stream-var)) + (file-position ,stream-var (file-position ,stream-var)))) + + ,@body + (incf (fd-stream-obuf-tail ,stream-var) ,size-var) + ,(ecase (car buffering) + (:none + `(flush-output-buffer ,stream-var)) + (:line + `(when (eq (char-code byte) (char-code #\Newline)) + (flush-output-buffer ,stream-var))) + (:full)) + (values)))) + (defmacro output-wrapper ((stream size buffering) &body body) (let ((stream-var (gensym))) `(let ((,stream-var ,stream)) @@ -221,6 +249,32 @@ (:full)) (values)))) +(defmacro def-output-routines/variable-width ((name-fmt size external-format + &rest bufferings) + &body body) + (declare (optimize (speed 1))) + (cons 'progn + (mapcar + (lambda (buffering) + (let ((function + (intern (let ((*print-case* :upcase)) + (format nil name-fmt (car buffering)))))) + `(progn + (defun ,function (stream byte) + (output-wrapper/variable-width (stream ,size ,buffering) + ,@body)) + (setf *output-routines* + (nconc *output-routines* + ',(mapcar + (lambda (type) + (list type + (car buffering) + function + 1 + external-format)) + (cdr buffering))))))) + bufferings))) + ;;; Define output routines that output numbers SIZE bytes long for the ;;; given bufferings. Use BODY to do the actual output. (defmacro def-output-routines ((name-fmt size &rest bufferings) &body body) @@ -242,10 +296,44 @@ (list type (car buffering) function - size)) + size + nil)) (cdr buffering))))))) bufferings))) +(def-output-routines/variable-width ("OUTPUT-CHAR-UTF8-~A-BUFFERED" + (let ((bits (char-code byte))) + (cond + ((< bits #x80) 1) + ((< bits #x800) 2) + ((< bits #x10000) 3) + (t 4))) + :utf-8 + (:none character) + (:line character) + (:full character)) + (if (char= byte #\Newline) + (setf (fd-stream-char-pos stream) 0) + (incf (fd-stream-char-pos stream))) + (let ((bits (char-code byte)) + (sap (fd-stream-obuf-sap stream)) + (tail (fd-stream-obuf-tail stream))) + (cond + ((< bits #x80) + (setf (sap-ref-8 sap tail) bits)) + ((< bits #x800) + (setf (sap-ref-8 sap tail) (logior #xc0 (ldb (byte 5 6) bits)) + (sap-ref-8 sap (1+ tail)) (logior #x80 (ldb (byte 6 0) bits)))) + ((< bits #x10000) + (setf (sap-ref-8 sap tail) (logior #xe0 (ldb (byte 4 12) bits)) + (sap-ref-8 sap (1+ tail)) (logior #x80 (ldb (byte 6 6) bits)) + (sap-ref-8 sap (+ 2 tail)) (logior #x80 (ldb (byte 6 0) bits)))) + (t + (setf (sap-ref-8 sap tail) (logior #xf0 (ldb (byte 3 18) bits)) + (sap-ref-8 sap (1+ tail)) (logior #x80 (ldb (byte 6 12) bits)) + (sap-ref-8 sap (+ 2 tail)) (logior #x80 (ldb (byte 6 6) bits)) + (sap-ref-8 sap (+ 3 tail)) (logior #x80 (ldb (byte 6 0) bits))))))) + (def-output-routines ("OUTPUT-CHAR-~A-BUFFERED" 1 (:none character) @@ -302,6 +390,62 @@ (fd-stream-obuf-tail stream)) byte)) +(defun output-utf8-bytes (fd-stream string flush-p &optional start end) + #!+sb-doc + "Output STRING to FD-STREAM. Handle external-format." + (let ((start (or start 0)) + (end (or end (length string)))) + (declare (type index start end)) + (when (> (fd-stream-ibuf-tail fd-stream) + (fd-stream-ibuf-head fd-stream)) + (file-position fd-stream (file-position fd-stream))) + (when (< end start) + (error ":END before :START!")) + (do () + ((= end start)) + (setf (fd-stream-obuf-tail fd-stream) + (do* ((len (fd-stream-obuf-length fd-stream)) + (sap (fd-stream-obuf-sap fd-stream)) + (tail (fd-stream-obuf-tail fd-stream)) + (space (- len tail))) + ((or (= start end) (< space 4)) tail) + (let ((bits (char-code (aref string start)))) + (incf tail + (cond + ((< bits #x80) + (setf (sap-ref-8 sap tail) bits) + 1) + ((< bits #x800) + (setf (sap-ref-8 sap tail) + (logior #xc0 (ldb (byte 5 6) bits)) + (sap-ref-8 sap (1+ tail)) + (logior #x80 (ldb (byte 6 0) bits))) + 2) + ((< bits #x10000) + (setf (sap-ref-8 sap tail) + (logior #xe0 (ldb (byte 4 12) bits)) + (sap-ref-8 sap (1+ tail)) + (logior #x80 (ldb (byte 6 6) bits)) + (sap-ref-8 sap (+ 2 tail)) + (logior #x80 (ldb (byte 6 0) bits))) + 3) + (t + (setf (sap-ref-8 sap tail) + (logior #xf0 (ldb (byte 3 18) bits)) + (sap-ref-8 sap (1+ tail)) + (logior #x80 (ldb (byte 6 12) bits)) + (sap-ref-8 sap (+ 2 tail)) + (logior #x80 (ldb (byte 6 6) bits)) + (sap-ref-8 sap (+ 3 tail)) + (logior #x80 (ldb (byte 6 0) bits))) + 4))) + (incf start)))) + (when (< start end) + (flush-output-buffer fd-stream) + (frob-output fd-stream string start end nil))) + (when flush-p + (flush-output-buffer fd-stream)))) + ;;; Do the actual output. If there is space to buffer the string, ;;; buffer it. If the string would normally fit in the buffer, but ;;; doesn't because of other stuff in the buffer, flush the old noise @@ -389,15 +533,23 @@ :from-end t :start start :end end)))) - (ecase (fd-stream-buffering stream) - (:full - (output-raw-bytes stream thing start end)) - (:line - (output-raw-bytes stream thing start end) - (when last-newline - (flush-output-buffer stream))) - (:none - (frob-output stream thing start end nil))) + (if (eq (fd-stream-external-format stream) :utf-8) + (ecase (fd-stream-buffering stream) + (:full + (output-utf8-bytes stream thing nil start end)) + (:line + (output-utf8-bytes stream thing last-newline start end)) + (:none + (output-utf8-bytes stream thing t start end))) + (ecase (fd-stream-buffering stream) + (:full + (output-raw-bytes stream thing start end)) + (:line + (output-raw-bytes stream thing start end) + (when last-newline + (flush-output-buffer stream))) + (:none + (frob-output stream thing start end nil)))) (if last-newline (setf (fd-stream-char-pos stream) (- end last-newline 1)) @@ -412,14 +564,16 @@ ;;; Find an output routine to use given the type and buffering. Return ;;; as multiple values the routine, the real type transfered, and the ;;; number of bytes per element. -(defun pick-output-routine (type buffering) +(defun pick-output-routine (type buffering &optional external-format) (dolist (entry *output-routines*) - (when (and (subtypep type (car entry)) - (eq buffering (cadr entry))) + (when (and (subtypep type (first entry)) + (eq buffering (second entry)) + (or (not (fifth entry)) + (eq external-format (fifth entry)))) (return-from pick-output-routine - (values (symbol-function (caddr entry)) - (car entry) - (cadddr entry))))) + (values (symbol-function (third entry)) + (first entry) + (fourth entry))))) ;; KLUDGE: dealing with the buffering here leads to excessive code ;; explosion. ;; @@ -561,6 +715,32 @@ (return)) (frob-input ,stream-var))))) +(defmacro input-wrapper/variable-width ((stream bytes eof-error eof-value) + &body read-forms) + (let ((stream-var (gensym)) + (element-var (gensym)) + (bytes-var (gensym))) + `(let ((,stream-var ,stream) + (,bytes-var nil)) + (if (fd-stream-unread ,stream-var) + (prog1 + (fd-stream-unread ,stream-var) + (setf (fd-stream-unread ,stream-var) nil) + (setf (fd-stream-listen ,stream-var) nil)) + (let ((,element-var + (catch 'eof-input-catcher + (input-at-least ,stream-var 1) + (let ((byte (sap-ref-8 (fd-stream-ibuf-sap ,stream-var) + (fd-stream-ibuf-head ,stream-var)))) + (setq ,bytes-var ,bytes) + (input-at-least ,stream-var ,bytes-var)) + (locally ,@read-forms)))) + (cond (,element-var + (incf (fd-stream-ibuf-head ,stream-var) ,bytes-var) + ,element-var) + (t + (eof-or-lose ,stream-var ,eof-error ,eof-value)))))))) + ;;; a macro to wrap around all input routines to handle EOF-ERROR noise (defmacro input-wrapper ((stream bytes eof-error eof-value) &body read-forms) (let ((stream-var (gensym)) @@ -581,6 +761,19 @@ (t (eof-or-lose ,stream-var ,eof-error ,eof-value)))))))) +(defmacro def-input-routine/variable-width (name + (type external-format size sap head) + &rest body) + `(progn + (defun ,name (stream eof-error eof-value) + (input-wrapper/variable-width (stream ,size eof-error eof-value) + (let ((,sap (fd-stream-ibuf-sap stream)) + (,head (fd-stream-ibuf-head stream))) + ,@body))) + (setf *input-routines* + (nconc *input-routines* + (list (list ',type ',name 1 ',external-format)))))) + (defmacro def-input-routine (name (type size sap head) &rest body) @@ -592,7 +785,30 @@ ,@body))) (setf *input-routines* (nconc *input-routines* - (list (list ',type ',name ',size)))))) + (list (list ',type ',name ',size nil)))))) + +(def-input-routine/variable-width input-utf8-character + (character :utf-8 + (cond + ((< byte #x80) 1) + ((< byte #xe0) 2) + ((< byte #xf0) 3) + (t 4)) + sap head) + (let ((byte (sap-ref-8 sap head))) + (code-char (cond + ((< byte #x80) byte) + ((< byte #xe0) + (dpb byte (byte 5 6) (sap-ref-8 sap (1+ head)))) + ((< byte #xf0) + (dpb byte (byte 4 12) + (dpb (sap-ref-8 sap (1+ head)) (byte 6 6) + (sap-ref-8 (+ 2 head))))) + (t + (dpb byte (byte 3 18) + (dpb (sap-ref-8 sap (1+ head)) (byte 6 12) + (dpb (sap-ref-8 sap (+ 2 head)) (byte 6 6) + (sap-ref-8 (+ 3 head)))))))))) ;;; STREAM-IN routine for reading a string char (def-input-routine input-character @@ -632,13 +848,15 @@ ;;; Find an input routine to use given the type. Return as multiple ;;; values the routine, the real type transfered, and the number of ;;; bytes per element. -(defun pick-input-routine (type) +(defun pick-input-routine (type &optional external-format) (dolist (entry *input-routines*) - (when (subtypep type (car entry)) + (when (and (subtypep type (first entry)) + (or (not (fourth entry)) + (eq external-format (fourth entry)))) (return-from pick-input-routine - (values (symbol-function (cadr entry)) - (car entry) - (caddr entry))))) + (values (symbol-function (second entry)) + (first entry) + (third entry))))) ;; FIXME: let's do it the hard way, then (but ignore things like ;; endianness, efficiency, and the necessary coupling between these ;; and the output routines). -- CSR, 2004-02-09 @@ -783,9 +1001,15 @@ (push (fd-stream-ibuf-sap fd-stream) *available-buffers*) (setf (fd-stream-ibuf-sap fd-stream) nil)) + ;; This isn't strictly necessary, but it'll make STREAM-EXTERNAL-FORMAT + ;; return a nice value on binary streams. + (unless (subtypep target-type 'character) + (setf (fd-stream-external-format fd-stream) :default)) + (when input-p (multiple-value-bind (routine type size) - (pick-input-routine target-type) + (pick-input-routine target-type + (fd-stream-external-format fd-stream)) (unless routine (error "could not find any input routine for ~S" target-type)) (setf (fd-stream-ibuf-sap fd-stream) (next-available-buffer)) @@ -815,7 +1039,9 @@ (when output-p (multiple-value-bind (routine type size) - (pick-output-routine target-type (fd-stream-buffering fd-stream)) + (pick-output-routine target-type + (fd-stream-buffering fd-stream) + (fd-stream-external-format fd-stream)) (unless routine (error "could not find any output routine for ~S buffered ~S" (fd-stream-buffering fd-stream) @@ -1113,6 +1339,7 @@ (output nil output-p) (element-type 'base-char) (buffering :full) + (external-format :default) timeout file original @@ -1136,6 +1363,7 @@ :delete-original delete-original :pathname pathname :buffering buffering + :external-format external-format :timeout timeout))) (set-fd-stream-routines stream element-type input output input-buffer-p) (when (and auto-close (fboundp 'finalize)) @@ -1199,8 +1427,6 @@ :IF-DOES-NOT-EXIST - one of :ERROR, :CREATE or NIL See the manual for details." - (declare (ignore external-format)) ; FIXME: CHECK-TYPE? WARN-if-not? - ;; Calculate useful stuff. (multiple-value-bind (input output mask) (case direction @@ -1325,6 +1551,7 @@ :input input :output output :element-type element-type + :external-format external-format :file namestring :original original :delete-original delete-original @@ -1440,7 +1667,9 @@ (string (length object)))) (defun stream-external-format (stream) - (declare (type file-stream stream) (ignore stream)) + (declare (type file-stream stream)) #!+sb-doc - "Return :DEFAULT." - :default) + "Return the actual external format for file-streams, otherwise :DEFAULT." + (if (typep stream 'file-stream) + (fd-stream-external-format stream) + :default)) |