Update of /cvsroot/sbcl/sbcl/src/code/external-formats
In directory fdv4jf1.ch3.sourceforge.com:/tmp/cvs-serv6056/src/code/external-formats
Modified Files:
enc-cyr.lisp enc-dos.lisp enc-ebcdic.lisp enc-iso.lisp
enc-win.lisp mb-util.lisp ucs-2.lisp
Added Files:
enc-basic.lisp
Log Message:
1.0.31.23: OAOOize external-format support
fd-streams and octets support independently kept records of
external-format->function maps, suitable for the purposes of each. This
revision stores all the relevant information for an external format in a
single place--a new EXTERNAL-FORMAT structure--and has both clients
reference things in that single place.
Doing so offers opportunities for other cleanups and speedups.
fd-streams external-format storage was an alist of lists, which was bad
for client code, since everything referred to fields with NTH or
SECOND/FOURTH/FIFTH. A proper DEFSTRUCT helps here and should be
slightly more space-efficient, as we're replacing a list with
(effectively) a vector. Also, since clients had to scan through an
alist to find an external-format, this design was hurting performance in
streams code, most notably OPEN. Replacing the alist with a hash table
(which the octets code was already using) should make things a lot
snappier.
--- NEW FILE: enc-basic.lisp ---
;;;; encodings available regardless of build-time unicode settings
;;;; This software is part of the SBCL system. See the README file for
;;;; more information.
;;;;
;;;; This software is derived from the CMU CL system, which was
;;;; written at Carnegie Mellon University and released into the
;;;; public domain. The software is in the public domain and is
;;;; provided with absolutely no warranty. See the COPYING and CREDITS
;;;; files for more information.
(in-package "SB!IMPL")
;;; ASCII
(declaim (inline code->ascii-mapper))
(defun code->ascii-mapper (code)
(declare (optimize speed (safety 0))
(type char-code code))
(if (> code 127)
nil
code))
(declaim (inline get-ascii-bytes))
(defun get-ascii-bytes (string pos)
(declare (optimize speed (safety 0))
(type simple-string string)
(type array-range pos))
(get-latin-bytes #'code->ascii-mapper :ascii string pos))
(defun string->ascii (string sstart send null-padding)
(declare (optimize speed (safety 0))
(type simple-string string)
(type array-range sstart send))
(values (string->latin% string sstart send #'get-ascii-bytes null-padding)))
(defmacro define-ascii->string (accessor type)
(let ((name (make-od-name 'ascii->string accessor)))
`(progn
(defun ,name (array astart aend)
(declare (optimize speed)
(type ,type array)
(type array-range astart aend))
;; Since there is such a thing as a malformed ascii byte, a
;; simple "make the string, fill it in" won't do.
(let ((string (make-array 0 :element-type 'character :fill-pointer 0 :adjustable t)))
(loop for apos from astart below aend
do (let* ((code (,accessor array apos))
(string-content
(if (< code 128)
(code-char code)
(decoding-error array apos (1+ apos) :ascii
'malformed-ascii apos))))
(if (characterp string-content)
(vector-push-extend string-content string)
(loop for c across string-content
do (vector-push-extend c string))))
finally (return (coerce string 'simple-string))))))))
(instantiate-octets-definition define-ascii->string)
(define-external-format (:ascii :us-ascii :ansi_x3.4-1968
:iso-646 :iso-646-us :|646|)
1 t
(if (>= bits 128)
(external-format-encoding-error stream bits)
(setf (sap-ref-8 sap tail) bits))
(code-char byte)
ascii->string-aref
string->ascii)
;;; Latin-1
(declaim (inline get-latin1-bytes))
(defun get-latin1-bytes (string pos)
(declare (optimize speed (safety 0))
(type simple-string string)
(type array-range pos))
(get-latin-bytes #'identity :latin-1 string pos))
(defun string->latin1 (string sstart send null-padding)
(declare (optimize speed (safety 0))
(type simple-string string)
(type array-range sstart send))
(values (string->latin% string sstart send #'get-latin1-bytes null-padding)))
(defmacro define-latin1->string* (accessor type)
(declare (ignore type))
(let ((name (make-od-name 'latin1->string* accessor)))
`(progn
(defun ,name (string sstart send array astart aend)
(,(make-od-name 'latin->string* accessor) string sstart send array astart aend #'identity)))))
(instantiate-octets-definition define-latin1->string*)
(defmacro define-latin1->string (accessor type)
(declare (ignore type))
`(defun ,(make-od-name 'latin1->string accessor) (array astart aend)
(,(make-od-name 'latin->string accessor) array astart aend #'identity)))
(instantiate-octets-definition define-latin1->string)
;;; Multiple names for the :ISO{,-}8859-* families are needed because on
;;; FreeBSD (and maybe other BSD systems), nl_langinfo("LATIN-1") will
;;; return "ISO8859-1" instead of "ISO-8859-1".
(define-external-format (:latin-1 :latin1 :iso-8859-1 :iso8859-1)
1 t
(if (>= bits 256)
(external-format-encoding-error stream bits)
(setf (sap-ref-8 sap tail) bits))
(code-char byte)
latin1->string-aref
string->latin1)
;;; UTF-8
;;; to UTF-8
(declaim (inline char-len-as-utf8))
(defun char-len-as-utf8 (code)
(declare (optimize speed (safety 0))
(type (integer 0 (#.sb!xc:char-code-limit)) code))
(cond ((< code 0) (bug "can't happen"))
((< code #x80) 1)
((< code #x800) 2)
((< code #x10000) 3)
((< code #x110000) 4)
(t (bug "can't happen"))))
(defun string->utf8 (string sstart send null-padding)
(declare (optimize (speed 3) (safety 0))
(type simple-string string)
(type (integer 0 1) null-padding)
(type array-range sstart send))
(macrolet ((ascii-bash ()
'(let ((array (make-array (+ null-padding (- send sstart))
:element-type '(unsigned-byte 8))))
(loop for i from 0
and j from sstart below send
do (setf (aref array i) (char-code (char string j))))
array)))
(etypecase string
((simple-array character (*))
(let ((utf8-length 0))
;; Since it has to fit in a vector, it must be a fixnum!
(declare (type (and unsigned-byte fixnum) utf8-length))
(loop for i of-type index from sstart below send
do (incf utf8-length (char-len-as-utf8 (char-code (char string i)))))
(if (= utf8-length (- send sstart))
(ascii-bash)
(let ((array (make-array (+ null-padding utf8-length)
:element-type '(unsigned-byte 8)))
(index 0))
(declare (type index index))
(flet ((add-byte (b)
(setf (aref array index) b)
(incf index)))
(declare (inline add-byte))
(loop for i of-type index from sstart below send
do (let ((code (char-code (char string i))))
(case (char-len-as-utf8 code)
(1
(add-byte code))
(2
(add-byte (logior #b11000000 (ldb (byte 5 6) code)))
(add-byte (logior #b10000000 (ldb (byte 6 0) code))))
(3
(add-byte (logior #b11100000 (ldb (byte 4 12) code)))
(add-byte (logior #b10000000 (ldb (byte 6 6) code)))
(add-byte (logior #b10000000 (ldb (byte 6 0) code))))
(4
(add-byte (logior #b11110000 (ldb (byte 3 18) code)))
(add-byte (logior #b10000000 (ldb (byte 6 12) code)))
(add-byte (logior #b10000000 (ldb (byte 6 6) code)))
(add-byte (logior #b10000000 (ldb (byte 6 0) code))))))
finally (return array)))))))
#!+sb-unicode
((simple-array base-char (*))
;; On unicode builds BASE-STRINGs are limited to ASCII range,
;; so we can take a fast path -- and get benefit of the element
;; type information. On non-unicode build BASE-CHAR ==
;; CHARACTER.
(ascii-bash))
((simple-array nil (*))
(if (= send sstart)
(make-array 0 :element-type '(unsigned-byte 8))
;; Just get the error...
(aref string sstart))))))
;;; from UTF-8
(defmacro define-bytes-per-utf8-character (accessor type)
(let ((name (make-od-name 'bytes-per-utf8-character accessor)))
`(progn
;;(declaim (inline ,name))
(let ((lexically-max
(string->utf8 (string (code-char ,(1- sb!xc:char-code-limit)))
0 1 0)))
(declare (type (simple-array (unsigned-byte 8) (#!+sb-unicode 4 #!-sb-unicode 2)) lexically-max))
(defun ,name (array pos end)
(declare (optimize speed (safety 0))
(type ,type array)
(type array-range pos end))
;; returns the number of bytes consumed and nil if it's a
;; valid character or the number of bytes consumed and a
;; replacement string if it's not.
(let ((initial-byte (,accessor array pos))
(reject-reason nil)
(reject-position pos)
(remaining-bytes (- end pos)))
(declare (type array-range reject-position remaining-bytes))
(labels ((valid-utf8-starter-byte-p (b)
(declare (type (unsigned-byte 8) b))
(let ((ok (cond
((zerop (logand b #b10000000)) 1)
((= (logand b #b11100000) #b11000000)
2)
((= (logand b #b11110000) #b11100000)
3)
((= (logand b #b11111000) #b11110000)
4)
((= (logand b #b11111100) #b11111000)
5)
((= (logand b #b11111110) #b11111100)
6)
(t
nil))))
(unless ok
(setf reject-reason 'invalid-utf8-starter-byte))
ok))
(enough-bytes-left-p (x)
(let ((ok (> end (+ pos (1- x)))))
(unless ok
(setf reject-reason 'end-of-input-in-character))
ok))
(valid-secondary-p (x)
(let* ((idx (the array-range (+ pos x)))
(b (,accessor array idx))
(ok (= (logand b #b11000000) #b10000000)))
(unless ok
(setf reject-reason 'invalid-utf8-continuation-byte)
(setf reject-position idx))
ok))
(preliminary-ok-for-length (maybe-len len)
(and (eql maybe-len len)
;; Has to be done in this order so that
;; certain broken sequences (e.g., the
;; two-byte sequence `"initial (length 3)"
;; "non-continuation"' -- `#xef #x32')
;; signal only part of that sequence as
;; erroneous.
(loop for i from 1 below (min len remaining-bytes)
always (valid-secondary-p i))
(enough-bytes-left-p len)))
(overlong-chk (x y)
(let ((ok (or (/= initial-byte x)
(/= (logior (,accessor array (the array-range (+ pos 1)))
y)
y))))
(unless ok
(setf reject-reason 'overlong-utf8-sequence))
ok))
(character-below-char-code-limit-p ()
;; This is only called on a four-byte sequence
;; (two in non-unicode builds) to ensure we
;; don't go over SBCL's character limts.
(let ((ok (cond ((< (aref lexically-max 0) (,accessor array pos))
nil)
((> (aref lexically-max 0) (,accessor array pos))
t)
((< (aref lexically-max 1) (,accessor array (+ pos 1)))
nil)
#!+sb-unicode
((> (aref lexically-max 1) (,accessor array (+ pos 1)))
t)
#!+sb-unicode
((< (aref lexically-max 2) (,accessor array (+ pos 2)))
nil)
#!+sb-unicode
((> (aref lexically-max 2) (,accessor array (+ pos 2)))
t)
#!+sb-unicode
((< (aref lexically-max 3) (,accessor array (+ pos 3)))
nil)
(t t))))
(unless ok
(setf reject-reason 'character-out-of-range))
ok)))
(declare (inline valid-utf8-starter-byte-p
enough-bytes-left-p
valid-secondary-p
preliminary-ok-for-length
overlong-chk))
(let ((maybe-len (valid-utf8-starter-byte-p initial-byte)))
(cond ((eql maybe-len 1)
(values 1 nil))
((and (preliminary-ok-for-length maybe-len 2)
(overlong-chk #b11000000 #b10111111)
(overlong-chk #b11000001 #b10111111)
#!-sb-unicode (character-below-char-code-limit-p))
(values 2 nil))
((and (preliminary-ok-for-length maybe-len 3)
(overlong-chk #b11100000 #b10011111)
#!-sb-unicode (not (setf reject-reason 'character-out-of-range)))
(values 3 nil))
((and (preliminary-ok-for-length maybe-len 4)
(overlong-chk #b11110000 #b10001111)
#!-sb-unicode (not (setf reject-reason 'character-out-of-range))
(character-below-char-code-limit-p))
(values 4 nil))
((and (preliminary-ok-for-length maybe-len 5)
(overlong-chk #b11111000 #b10000111)
(not (setf reject-reason 'character-out-of-range)))
(bug "can't happen"))
((and (preliminary-ok-for-length maybe-len 6)
(overlong-chk #b11111100 #b10000011)
(not (setf reject-reason 'character-out-of-range)))
(bug "can't happen"))
(t
(let* ((bad-end (ecase reject-reason
(invalid-utf8-starter-byte
(1+ pos))
(end-of-input-in-character
end)
(invalid-utf8-continuation-byte
reject-position)
((overlong-utf8-sequence character-out-of-range)
(+ pos maybe-len))))
(bad-len (- bad-end pos)))
(declare (type array-range bad-end bad-len))
(let ((replacement (decoding-error array pos bad-end :utf-8 reject-reason reject-position)))
(values bad-len replacement)))))))))))))
(instantiate-octets-definition define-bytes-per-utf8-character)
(defmacro define-simple-get-utf8-char (accessor type)
(let ((name (make-od-name 'simple-get-utf8-char accessor)))
`(progn
(declaim (inline ,name))
(defun ,name (array pos bytes)
(declare (optimize speed (safety 0))
(type ,type array)
(type array-range pos)
(type (integer 1 4) bytes))
(flet ((cref (x)
(,accessor array (the array-range (+ pos x)))))
(declare (inline cref))
(code-char (ecase bytes
(1 (cref 0))
(2 (logior (ash (ldb (byte 5 0) (cref 0)) 6)
(ldb (byte 6 0) (cref 1))))
(3 (logior (ash (ldb (byte 4 0) (cref 0)) 12)
(ash (ldb (byte 6 0) (cref 1)) 6)
(ldb (byte 6 0) (cref 2))))
(4 (logior (ash (ldb (byte 3 0) (cref 0)) 18)
(ash (ldb (byte 6 0) (cref 1)) 12)
(ash (ldb (byte 6 0) (cref 2)) 6)
(ldb (byte 6 0) (cref 3)))))))))))
(instantiate-octets-definition define-simple-get-utf8-char)
(defmacro define-utf8->string (accessor type)
(let ((name (make-od-name 'utf8->string accessor)))
`(progn
(defun ,name (array astart aend)
(declare (optimize speed (safety 0))
(type ,type array)
(type array-range astart aend))
(let ((string (make-array 0 :adjustable t :fill-pointer 0 :element-type 'character)))
(loop with pos = astart
while (< pos aend)
do (multiple-value-bind (bytes invalid)
(,(make-od-name 'bytes-per-utf8-character accessor) array pos aend)
(declare (type (or null string) invalid))
(cond
((null invalid)
(vector-push-extend (,(make-od-name 'simple-get-utf8-char accessor) array pos bytes) string))
(t
(dotimes (i (length invalid))
(vector-push-extend (char invalid i) string))))
(incf pos bytes)))
(coerce string 'simple-string))))))
(instantiate-octets-definition define-utf8->string)
(define-external-format/variable-width (:utf-8 :utf8) nil
(let ((bits (char-code byte)))
(cond ((< bits #x80) 1)
((< bits #x800) 2)
((< bits #x10000) 3)
(t 4)))
(ecase size
(1 (setf (sap-ref-8 sap tail) bits))
(2 (setf (sap-ref-8 sap tail) (logior #xc0 (ldb (byte 5 6) bits))
(sap-ref-8 sap (+ 1 tail)) (logior #x80 (ldb (byte 6 0) bits))))
(3 (setf (sap-ref-8 sap tail) (logior #xe0 (ldb (byte 4 12) bits))
(sap-ref-8 sap (+ 1 tail)) (logior #x80 (ldb (byte 6 6) bits))
(sap-ref-8 sap (+ 2 tail)) (logior #x80 (ldb (byte 6 0) bits))))
(4 (setf (sap-ref-8 sap tail) (logior #xf0 (ldb (byte 3 18) bits))
(sap-ref-8 sap (+ 1 tail)) (logior #x80 (ldb (byte 6 12) bits))
(sap-ref-8 sap (+ 2 tail)) (logior #x80 (ldb (byte 6 6) bits))
(sap-ref-8 sap (+ 3 tail)) (logior #x80 (ldb (byte 6 0) bits)))))
(cond ((< byte #x80) 1)
((< byte #xc2) (return-from decode-break-reason 1))
((< byte #xe0) 2)
((< byte #xf0) 3)
(t 4))
(code-char (ecase size
(1 byte)
(2 (let ((byte2 (sap-ref-8 sap (1+ head))))
(unless (<= #x80 byte2 #xbf)
(return-from decode-break-reason 2))
(dpb byte (byte 5 6) byte2)))
(3 (let ((byte2 (sap-ref-8 sap (1+ head)))
(byte3 (sap-ref-8 sap (+ 2 head))))
(unless (and (<= #x80 byte2 #xbf)
(<= #x80 byte3 #xbf))
(return-from decode-break-reason 3))
(dpb byte (byte 4 12) (dpb byte2 (byte 6 6) byte3))))
(4 (let ((byte2 (sap-ref-8 sap (1+ head)))
(byte3 (sap-ref-8 sap (+ 2 head)))
(byte4 (sap-ref-8 sap (+ 3 head))))
(unless (and (<= #x80 byte2 #xbf)
(<= #x80 byte3 #xbf)
(<= #x80 byte4 #xbf))
(return-from decode-break-reason 4))
(dpb byte (byte 3 18)
(dpb byte2 (byte 6 12)
(dpb byte3 (byte 6 6) byte4)))))))
utf8->string-aref
string->utf8)
Index: enc-cyr.lisp
===================================================================
RCS file: /cvsroot/sbcl/sbcl/src/code/external-formats/enc-cyr.lisp,v
retrieving revision 1.5
retrieving revision 1.6
diff -u -d -r1.5 -r1.6
--- enc-cyr.lisp 6 May 2007 02:28:43 -0000 1.5
+++ enc-cyr.lisp 29 Sep 2009 01:02:20 -0000 1.6
@@ -160,9 +160,6 @@
(instantiate-octets-definition define-koi8-r->string)
-(add-external-format-funs '(:koi8-r :|koi8-r|)
- '(koi8-r->string-aref string->koi8-r))
-
(define-external-format (:koi8-r :|koi8-r|)
1 t
(let ((koi8-r-byte (code->koi8-r-mapper bits)))
@@ -172,7 +169,9 @@
(let ((code (koi8-r->code-mapper byte)))
(if code
(code-char code)
- (external-format-decoding-error stream byte)))) ;; TODO -- error check
+ (external-format-decoding-error stream byte)))
+ koi8-r->string-aref
+ string->koi8-r) ;; TODO -- error check
(define-unibyte-mapper koi8-u->code-mapper code->koi8-u-mapper
(#x80 #x2500) ; BOX DRAWINGS LIGHT HORIZONTAL
@@ -334,9 +333,6 @@
(instantiate-octets-definition define-koi8-u->string)
-(add-external-format-funs '(:koi8-u :|koi8-u|)
- '(koi8-u->string-aref string->koi8-u))
-
(define-external-format (:koi8-u :|koi8-u|)
1 t
(let ((koi8-u-byte (code->koi8-u-mapper bits)))
@@ -346,7 +342,9 @@
(let ((code (koi8-u->code-mapper byte)))
(if code
(code-char code)
- (external-format-decoding-error stream byte)))) ;; TODO -- error check
+ (external-format-decoding-error stream byte)))
+ koi8-u->string-aref
+ string->koi8-u) ;; TODO -- error check
(define-unibyte-mapper x-mac-cyrillic->code-mapper code->x-mac-cyrillic-mapper
(#x80 #x0410) ; CYRILLIC CAPITAL LETTER A
@@ -503,9 +501,6 @@
(instantiate-octets-definition define-x-mac-cyrillic->string)
-(add-external-format-funs '(:x-mac-cyrillic :|x-mac-cyrillic|)
- '(x-mac-cyrillic->string-aref string->x-mac-cyrillic))
-
(define-external-format (:x-mac-cyrillic :|x-mac-cyrillic|)
1 t
(let ((x-mac-cyrillic-byte (code->x-mac-cyrillic-mapper bits)))
@@ -515,4 +510,6 @@
(let ((code (x-mac-cyrillic->code-mapper byte)))
(if code
(code-char code)
- (external-format-decoding-error stream byte)))) ;; TODO -- error check
+ (external-format-decoding-error stream byte)))
+ x-mac-cyrillic->string-aref
+ string->x-mac-cyrillic) ;; TODO -- error check
Index: enc-dos.lisp
===================================================================
RCS file: /cvsroot/sbcl/sbcl/src/code/external-formats/enc-dos.lisp,v
retrieving revision 1.5
retrieving revision 1.6
diff -u -d -r1.5 -r1.6
--- enc-dos.lisp 6 May 2007 02:28:43 -0000 1.5
+++ enc-dos.lisp 29 Sep 2009 01:02:20 -0000 1.6
@@ -160,9 +160,6 @@
(instantiate-octets-definition define-cp437->string)
-(add-external-format-funs '(:cp437 :|cp437|)
- '(cp437->string-aref string->cp437))
-
(define-external-format (:cp437 :|cp437|)
1 t
(let ((cp437-byte (code->cp437-mapper bits)))
@@ -172,7 +169,9 @@
(let ((code (cp437->code-mapper byte)))
(if code
(code-char code)
- (external-format-decoding-error stream byte)))) ;; TODO -- error check
+ (external-format-decoding-error stream byte)))
+ cp437->string-aref
+ string->cp437) ;; TODO -- error check
(define-unibyte-mapper cp850->code-mapper code->cp850-mapper
(#x80 #x00C7) ; LATIN CAPITAL LETTER C WITH CEDILLA
@@ -334,9 +333,6 @@
(instantiate-octets-definition define-cp850->string)
-(add-external-format-funs '(:cp850 :|cp850|)
- '(cp850->string-aref string->cp850))
-
(define-external-format (:cp850 :|cp850|)
1 t
(let ((cp850-byte (code->cp850-mapper bits)))
@@ -346,7 +342,9 @@
(let ((code (cp850->code-mapper byte)))
(if code
(code-char code)
- (external-format-decoding-error stream byte)))) ;; TODO -- error check
+ (external-format-decoding-error stream byte)))
+ cp850->string-aref
+ string->cp850) ;; TODO -- error check
(define-unibyte-mapper cp852->code-mapper code->cp852-mapper
(#x80 #x00C7) ; LATIN CAPITAL LETTER C WITH CEDILLA
@@ -508,9 +506,6 @@
(instantiate-octets-definition define-cp852->string)
-(add-external-format-funs '(:cp852 :|cp852|)
- '(cp852->string-aref string->cp852))
-
(define-external-format (:cp852 :|cp852|)
1 t
(let ((cp852-byte (code->cp852-mapper bits)))
@@ -520,7 +515,9 @@
(let ((code (cp852->code-mapper byte)))
(if code
(code-char code)
- (external-format-decoding-error stream byte)))) ;; TODO -- error check
+ (external-format-decoding-error stream byte)))
+ cp852->string-aref
+ string->cp852) ;; TODO -- error check
(define-unibyte-mapper cp855->code-mapper code->cp855-mapper
(#x80 #x0452) ; CYRILLIC SMALL LETTER DJE
@@ -682,9 +679,6 @@
(instantiate-octets-definition define-cp855->string)
-(add-external-format-funs '(:cp855 :|cp855|)
- '(cp855->string-aref string->cp855))
-
(define-external-format (:cp855 :|cp855|)
1 t
(let ((cp855-byte (code->cp855-mapper bits)))
@@ -694,7 +688,9 @@
(let ((code (cp855->code-mapper byte)))
(if code
(code-char code)
- (external-format-decoding-error stream byte)))) ;; TODO -- error check
+ (external-format-decoding-error stream byte)))
+ cp855->string-aref
+ string->cp855) ;; TODO -- error check
(define-unibyte-mapper cp857->code-mapper code->cp857-mapper
(#x80 #x00C7) ; LATIN CAPITAL LETTER C WITH CEDILLA
@@ -855,9 +851,6 @@
(instantiate-octets-definition define-cp857->string)
-(add-external-format-funs '(:cp857 :|cp857|)
- '(cp857->string-aref string->cp857))
-
(define-external-format (:cp857 :|cp857|)
1 t
(let ((cp857-byte (code->cp857-mapper bits)))
@@ -867,7 +860,9 @@
(let ((code (cp857->code-mapper byte)))
(if code
(code-char code)
- (external-format-decoding-error stream byte)))) ;; TODO -- error check
+ (external-format-decoding-error stream byte)))
+ cp857->string-aref
+ string->cp857) ;; TODO -- error check
(define-unibyte-mapper cp860->code-mapper code->cp860-mapper
(#x80 #x00C7) ; LATIN CAPITAL LETTER C WITH CEDILLA
@@ -1029,9 +1024,6 @@
(instantiate-octets-definition define-cp860->string)
-(add-external-format-funs '(:cp860 :|cp860|)
- '(cp860->string-aref string->cp860))
-
(define-external-format (:cp860 :|cp860|)
1 t
(let ((cp860-byte (code->cp860-mapper bits)))
@@ -1041,7 +1033,9 @@
(let ((code (cp860->code-mapper byte)))
(if code
(code-char code)
- (external-format-decoding-error stream byte)))) ;; TODO -- error check
+ (external-format-decoding-error stream byte)))
+ cp860->string-aref
+ string->cp860) ;; TODO -- error check
(define-unibyte-mapper cp861->code-mapper code->cp861-mapper
(#x80 #x00C7) ; LATIN CAPITAL LETTER C WITH CEDILLA
@@ -1203,9 +1197,6 @@
(instantiate-octets-definition define-cp861->string)
-(add-external-format-funs '(:cp861 :|cp861|)
- '(cp861->string-aref string->cp861))
-
(define-external-format (:cp861 :|cp861|)
1 t
(let ((cp861-byte (code->cp861-mapper bits)))
@@ -1215,7 +1206,9 @@
(let ((code (cp861->code-mapper byte)))
(if code
(code-char code)
- (external-format-decoding-error stream byte)))) ;; TODO -- error check
+ (external-format-decoding-error stream byte)))
+ cp861->string-aref
+ string->cp861) ;; TODO -- error check
(define-unibyte-mapper cp862->code-mapper code->cp862-mapper
(#x80 #x05D0) ; HEBREW LETTER ALEF
@@ -1377,9 +1370,6 @@
(instantiate-octets-definition define-cp862->string)
-(add-external-format-funs '(:cp862 :|cp862|)
- '(cp862->string-aref string->cp862))
-
(define-external-format (:cp862 :|cp862|)
1 t
(let ((cp862-byte (code->cp862-mapper bits)))
@@ -1389,7 +1379,9 @@
(let ((code (cp862->code-mapper byte)))
(if code
(code-char code)
- (external-format-decoding-error stream byte)))) ;; TODO -- error check
+ (external-format-decoding-error stream byte)))
+ cp862->string-aref
+ string->cp862) ;; TODO -- error check
(define-unibyte-mapper cp863->code-mapper code->cp863-mapper
(#x80 #x00C7) ; LATIN CAPITAL LETTER C WITH CEDILLA
@@ -1551,9 +1543,6 @@
(instantiate-octets-definition define-cp863->string)
-(add-external-format-funs '(:cp863 :|cp863|)
- '(cp863->string-aref string->cp863))
-
(define-external-format (:cp863 :|cp863|)
1 t
(let ((cp863-byte (code->cp863-mapper bits)))
@@ -1563,7 +1552,9 @@
(let ((code (cp863->code-mapper byte)))
(if code
(code-char code)
- (external-format-decoding-error stream byte)))) ;; TODO -- error check
+ (external-format-decoding-error stream byte)))
+ cp863->string-aref
+ string->cp863) ;; TODO -- error check
(define-unibyte-mapper cp864->code-mapper code->cp864-mapper
(#x80 #x00B0) ; DEGREE SIGN
@@ -1722,9 +1713,6 @@
(instantiate-octets-definition define-cp864->string)
-(add-external-format-funs '(:cp864 :|cp864|)
- '(cp864->string-aref string->cp864))
-
(define-external-format (:cp864 :|cp864|)
1 t
(let ((cp864-byte (code->cp864-mapper bits)))
@@ -1734,7 +1722,9 @@
(let ((code (cp864->code-mapper byte)))
(if code
(code-char code)
- (external-format-decoding-error stream byte)))) ;; TODO -- error check
+ (external-format-decoding-error stream byte)))
+ cp864->string-aref
+ string->cp864) ;; TODO -- error check
(define-unibyte-mapper cp865->code-mapper code->cp865-mapper
(#x80 #x00C7) ; LATIN CAPITAL LETTER C WITH CEDILLA
@@ -1896,9 +1886,6 @@
(instantiate-octets-definition define-cp865->string)
-(add-external-format-funs '(:cp865 :|cp865|)
- '(cp865->string-aref string->cp865))
-
(define-external-format (:cp865 :|cp865|)
1 t
(let ((cp865-byte (code->cp865-mapper bits)))
@@ -1908,7 +1895,9 @@
(let ((code (cp865->code-mapper byte)))
(if code
(code-char code)
- (external-format-decoding-error stream byte)))) ;; TODO -- error check
+ (external-format-decoding-error stream byte)))
+ cp865->string-aref
+ string->cp865) ;; TODO -- error check
(define-unibyte-mapper cp866->code-mapper code->cp866-mapper
(#x80 #x0410) ; CYRILLIC CAPITAL LETTER A
@@ -2070,9 +2059,6 @@
(instantiate-octets-definition define-cp866->string)
-(add-external-format-funs '(:cp866 :|cp866|)
- '(cp866->string-aref string->cp866))
-
(define-external-format (:cp866 :|cp866|)
1 t
(let ((cp866-byte (code->cp866-mapper bits)))
@@ -2082,7 +2068,9 @@
(let ((code (cp866->code-mapper byte)))
(if code
(code-char code)
- (external-format-decoding-error stream byte)))) ;; TODO -- error check
+ (external-format-decoding-error stream byte)))
+ cp866->string-aref
+ string->cp866) ;; TODO -- error check
(define-unibyte-mapper cp869->code-mapper code->cp869-mapper
(#x80 nil)
@@ -2244,9 +2232,6 @@
(instantiate-octets-definition define-cp869->string)
-(add-external-format-funs '(:cp869 :|cp869|)
- '(cp869->string-aref string->cp869))
-
(define-external-format (:cp869 :|cp869|)
1 t
(let ((cp869-byte (code->cp869-mapper bits)))
@@ -2256,7 +2241,9 @@
(let ((code (cp869->code-mapper byte)))
(if code
(code-char code)
- (external-format-decoding-error stream byte)))) ;; TODO -- error check
+ (external-format-decoding-error stream byte)))
+ cp869->string-aref
+ string->cp869) ;; TODO -- error check
(define-unibyte-mapper cp874->code-mapper code->cp874-mapper
(#x80 #x20AC) ; EURO SIGN
@@ -2417,9 +2404,6 @@
(instantiate-octets-definition define-cp874->string)
-(add-external-format-funs '(:cp874 :|cp874|)
- '(cp874->string-aref string->cp874))
-
(define-external-format (:cp874 :|cp874|)
1 t
(let ((cp874-byte (code->cp874-mapper bits)))
@@ -2429,4 +2413,6 @@
(let ((code (cp874->code-mapper byte)))
(if code
(code-char code)
- (external-format-decoding-error stream byte)))) ;; TODO -- error check
+ (external-format-decoding-error stream byte)))
+ cp874->string-aref
+ string->cp874) ;; TODO -- error check
Index: enc-ebcdic.lisp
===================================================================
RCS file: /cvsroot/sbcl/sbcl/src/code/external-formats/enc-ebcdic.lisp,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -d -r1.1 -r1.2
--- enc-ebcdic.lisp 7 Aug 2009 11:21:22 -0000 1.1
+++ enc-ebcdic.lisp 29 Sep 2009 01:02:20 -0000 1.2
@@ -61,13 +61,12 @@
(,(make-od-name 'latin->string accessor) array astart aend #'ebcdic-us->code-mapper)))
(instantiate-octets-definition define-ebcdic-us->string)
-(add-external-format-funs '(:ebcdic-us :cp037 :|cp037| :ibm-037 :ibm037)
- '(ebcdic-us->string-aref string->ebcdic-us))
-
(define-external-format (:ebcdic-us :cp037 :|cp037| :ibm-037 :ibm037)
1 t
(let ((ebcdic-us-byte (code->ebcdic-us-mapper bits)))
(if ebcdic-us-byte
(setf (sap-ref-8 sap tail) ebcdic-us-byte)
(external-format-encoding-error stream bits)))
- (code-char (ebcdic-us->code-mapper byte)))
+ (code-char (ebcdic-us->code-mapper byte))
+ ebcdic-us->string-aref
+ string->ebcdic-us)
Index: enc-iso.lisp
===================================================================
RCS file: /cvsroot/sbcl/sbcl/src/code/external-formats/enc-iso.lisp,v
retrieving revision 1.6
retrieving revision 1.7
diff -u -d -r1.6 -r1.7
--- enc-iso.lisp 6 May 2007 02:28:43 -0000 1.6
+++ enc-iso.lisp 29 Sep 2009 01:02:20 -0000 1.7
@@ -89,9 +89,6 @@
(instantiate-octets-definition define-iso-8859-2->string)
-(add-external-format-funs '(:iso-8859-2 :|iso-8859-2| :latin-2 :|latin-2|)
- '(iso-8859-2->string-aref string->iso-8859-2))
-
(define-external-format (:iso-8859-2 :|iso-8859-2| :latin-2 :|latin-2|)
1 t
(let ((iso-8859-2-byte (code->iso-8859-2-mapper bits)))
@@ -101,7 +98,9 @@
(let ((code (iso-8859-2->code-mapper byte)))
(if code
(code-char code)
- (external-format-decoding-error stream byte)))) ;; TODO -- error check
+ (external-format-decoding-error stream byte)))
+ iso-8859-2->string-aref
+ string->iso8859-2) ;; TODO -- error check
(define-unibyte-mapper iso-8859-3->code-mapper code->iso-8859-3-mapper
(#xA1 #x0126) ; LATIN CAPITAL LETTER H WITH STROKE
@@ -170,9 +169,6 @@
(instantiate-octets-definition define-iso-8859-3->string)
-(add-external-format-funs '(:iso-8859-3 :|iso-8859-3| :latin-3 :|latin-3|)
- '(iso-8859-3->string-aref string->iso-8859-3))
-
(define-external-format (:iso-8859-3 :|iso-8859-3| :latin-3 :|latin-3|)
1 t
(let ((iso-8859-3-byte (code->iso-8859-3-mapper bits)))
@@ -182,7 +178,9 @@
(let ((code (iso-8859-3->code-mapper byte)))
(if code
(code-char code)
- (external-format-decoding-error stream byte)))) ;; TODO -- error check
+ (external-format-decoding-error stream byte)))
+ iso-8859-3->string-aref
+ string->iso-8859-3) ;; TODO -- error check
(define-unibyte-mapper iso-8859-4->code-mapper code->iso-8859-4-mapper
(#xA1 #x0104) ; LATIN CAPITAL LETTER A WITH OGONEK
@@ -266,9 +264,6 @@
(instantiate-octets-definition define-iso-8859-4->string)
-(add-external-format-funs '(:iso-8859-4 :|iso-8859-4| :latin-4 :|latin-4|)
- '(iso-8859-4->string-aref string->iso-8859-4))
-
(define-external-format (:iso-8859-4 :|iso-8859-4| :latin-4 :|latin-4|)
1 t
(let ((iso-8859-4-byte (code->iso-8859-4-mapper bits)))
@@ -278,7 +273,9 @@
(let ((code (iso-8859-4->code-mapper byte)))
(if code
(code-char code)
- (external-format-decoding-error stream byte)))) ;; TODO -- error check
+ (external-format-decoding-error stream byte)))
+ iso-8859-4->string-aref
+ string->iso-8859-4) ;; TODO -- error check
(define-unibyte-mapper iso-8859-5->code-mapper code->iso-8859-5-mapper
(#xA1 #x0401) ; CYRILLIC CAPITAL LETTER IO
@@ -406,9 +403,6 @@
(instantiate-octets-definition define-iso-8859-5->string)
-(add-external-format-funs '(:iso-8859-5 :|iso-8859-5|)
- '(iso-8859-5->string-aref string->iso-8859-5))
-
(define-external-format (:iso-8859-5 :|iso-8859-5|)
1 t
(let ((iso-8859-5-byte (code->iso-8859-5-mapper bits)))
@@ -418,7 +412,9 @@
(let ((code (iso-8859-5->code-mapper byte)))
(if code
(code-char code)
- (external-format-decoding-error stream byte)))) ;; TODO -- error check
+ (external-format-decoding-error stream byte)))
+ iso-8859-5->string-aref
+ string->iso-8859-5) ;; TODO -- error check
(define-unibyte-mapper iso-8859-6->code-mapper code->iso-8859-6-mapper
(#xA1 nil)
@@ -545,9 +541,6 @@
(instantiate-octets-definition define-iso-8859-6->string)
-(add-external-format-funs '(:iso-8859-6 :|iso-8859-6|)
- '(iso-8859-6->string-aref string->iso-8859-6))
-
(define-external-format (:iso-8859-6 :|iso-8859-6|)
1 t
(let ((iso-8859-6-byte (code->iso-8859-6-mapper bits)))
@@ -557,7 +550,9 @@
(let ((code (iso-8859-6->code-mapper byte)))
(if code
(code-char code)
- (external-format-decoding-error stream byte)))) ;; TODO -- error check
+ (external-format-decoding-error stream byte)))
+ iso-8859-6->string-aref
+ string->iso-8859-6) ;; TODO -- error check
(define-unibyte-mapper iso-8859-7->code-mapper code->iso-8859-7-mapper
(#xA1 #x02BD) ; MODIFIER LETTER REVERSED COMMA
@@ -671,9 +666,6 @@
(instantiate-octets-definition define-iso-8859-7->string)
-(add-external-format-funs '(:iso-8859-7 :|iso-8859-7|)
- '(iso-8859-7->string-aref string->iso-8859-7))
-
(define-external-format (:iso-8859-7 :|iso-8859-7|)
1 t
(let ((iso-8859-7-byte (code->iso-8859-7-mapper bits)))
@@ -683,7 +675,9 @@
(let ((code (iso-8859-7->code-mapper byte)))
(if code
(code-char code)
- (external-format-decoding-error stream byte)))) ;; TODO -- error check
+ (external-format-decoding-error stream byte)))
+ iso-8859-7->string-aref
+ string->iso-8859-7) ;; TODO -- error check
(define-unibyte-mapper iso-8859-8->code-mapper code->iso-8859-8-mapper
(#xA1 nil)
@@ -786,9 +780,6 @@
(instantiate-octets-definition define-iso-8859-8->string)
-(add-external-format-funs '(:iso-8859-8 :|iso-8859-8|)
- '(iso-8859-8->string-aref string->iso-8859-8))
-
(define-external-format (:iso-8859-8 :|iso-8859-8|)
1 t
(let ((iso-8859-8-byte (code->iso-8859-8-mapper bits)))
@@ -798,7 +789,9 @@
(let ((code (iso-8859-8->code-mapper byte)))
(if code
(code-char code)
- (external-format-decoding-error stream byte)))) ;; TODO -- error check
+ (external-format-decoding-error stream byte)))
+ iso-8859-8->string-aref
+ string->iso-8859-8) ;; TODO -- error check
(define-unibyte-mapper iso-8859-9->code-mapper code->iso-8859-9-mapper
(#xD0 #x011E) ; LATIN CAPITAL LETTER G WITH BREVE
@@ -838,9 +831,6 @@
(instantiate-octets-definition define-iso-8859-9->string)
-(add-external-format-funs '(:iso-8859-9 :|iso-8859-9| :latin-5 :|latin-5|)
- '(iso-8859-9->string-aref string->iso-8859-9))
-
(define-external-format (:iso-8859-9 :|iso-8859-9| :latin-5 :|latin-5|)
1 t
(let ((iso-8859-9-byte (code->iso-8859-9-mapper bits)))
@@ -850,7 +840,9 @@
(let ((code (iso-8859-9->code-mapper byte)))
(if code
(code-char code)
- (external-format-decoding-error stream byte)))) ;; TODO -- error check
+ (external-format-decoding-error stream byte)))
+ iso-8859-9->string-aref
+ string->iso-8859-9) ;; TODO -- error check
(define-unibyte-mapper iso-8859-10->code-mapper code->iso-8859-10-mapper
(#xA1 #x0104) ; LATIN CAPITAL LETTER A WITH OGONEK
@@ -930,9 +922,6 @@
(instantiate-octets-definition define-iso-8859-10->string)
-(add-external-format-funs '(:iso-8859-10 :|iso-8859-10| :latin-6 :|latin-6|)
- '(iso-8859-10->string-aref string->iso-8859-10))
-
(define-external-format (:iso-8859-10 :|iso-8859-10| :latin-6 :|latin-6|)
1 t
(let ((iso-8859-10-byte (code->iso-8859-10-mapper bits)))
@@ -942,7 +931,9 @@
(let ((code (iso-8859-10->code-mapper byte)))
(if code
(code-char code)
- (external-format-decoding-error stream byte)))) ;; TODO -- error check
+ (external-format-decoding-error stream byte)))
+ iso-8859-10->string-aref
+ string->iso-8859-10) ;; TODO -- error check
(define-unibyte-mapper iso-8859-11->code-mapper code->iso-8859-11-mapper
(#xA1 #x0E01) ; THAI CHARACTER KO KAI
@@ -1071,9 +1062,6 @@
(instantiate-octets-definition define-iso-8859-11->string)
-(add-external-format-funs '(:iso-8859-11 :|iso-8859-11|)
- '(iso-8859-11->string-aref string->iso-8859-11))
-
(define-external-format (:iso-8859-11 :|iso-8859-11|)
1 t
(let ((iso-8859-11-byte (code->iso-8859-11-mapper bits)))
@@ -1083,7 +1071,9 @@
(let ((code (iso-8859-11->code-mapper byte)))
(if code
(code-char code)
- (external-format-decoding-error stream byte)))) ;; TODO -- error check
+ (external-format-decoding-error stream byte)))
+ iso-8859-11->string-aref
+ string->iso-8859-11) ;; TODO -- error check
(define-unibyte-mapper iso-8859-13->code-mapper code->iso-8859-13-mapper
(#xA1 #x201D) ; RIGHT DOUBLE QUOTATION MARK
@@ -1173,9 +1163,6 @@
(instantiate-octets-definition define-iso-8859-13->string)
-(add-external-format-funs '(:iso-8859-13 :|iso-8859-13| :latin-7 :|latin-7|)
- '(iso-8859-13->string-aref string->iso-8859-13))
-
(define-external-format (:iso-8859-13 :|iso-8859-13| :latin-7 :|latin-7|)
1 t
(let ((iso-8859-13-byte (code->iso-8859-13-mapper bits)))
@@ -1185,7 +1172,9 @@
(let ((code (iso-8859-13->code-mapper byte)))
(if code
(code-char code)
- (external-format-decoding-error stream byte)))) ;; TODO -- error check
+ (external-format-decoding-error stream byte)))
+ iso-8859-13->string-aref
+ string->iso-8859-13) ;; TODO -- error check
(define-unibyte-mapper iso-8859-14->code-mapper code->iso-8859-14-mapper
(#xA1 #x1E02) ; LATIN CAPITAL LETTER B WITH DOT ABOVE
@@ -1250,9 +1239,6 @@
(instantiate-octets-definition define-iso-8859-14->string)
-(add-external-format-funs '(:iso-8859-14 :|iso-8859-14| :latin-8 :|latin-8|)
- '(iso-8859-14->string-aref string->iso-8859-14))
-
(define-external-format (:iso-8859-14 :|iso-8859-14| :latin-8 :|latin-8|)
1 t
(let ((iso-8859-14-byte (code->iso-8859-14-mapper bits)))
@@ -1262,4 +1248,59 @@
(let ((code (iso-8859-14->code-mapper byte)))
(if code
(code-char code)
- (external-format-decoding-error stream byte)))) ;; TODO -- error check
+ (external-format-decoding-error stream byte)))
+ iso-8859-14->string-aref
+ string->iso-8859-14) ;; TODO -- error check
+
+(define-unibyte-mapper
+ latin9->code-mapper
+ code->latin9-mapper
+ (#xA4 #x20AC)
+ (#xA6 #x0160)
+ (#xA8 #x0161)
+ (#xB4 #x017D)
+ (#xB8 #x017E)
+ (#xBC #x0152)
+ (#xBD #x0153)
+ (#xBE #x0178))
+
+(declaim (inline get-latin9-bytes))
+(defun get-latin9-bytes (string pos)
+ (declare (optimize speed (safety 0))
+ (type simple-string string)
+ (type array-range pos))
+ (get-latin-bytes #'code->latin9-mapper :latin-9 string pos))
+
+(defun string->latin9 (string sstart send null-padding)
+ (declare (optimize speed (safety 0))
+ (type simple-string string)
+ (type array-range sstart send))
+ (values (string->latin% string sstart send #'get-latin9-bytes null-padding)))
+
+(defmacro define-latin9->string* (accessor type)
+ (declare (ignore type))
+ (let ((name (make-od-name 'latin9->string* accessor)))
+ `(progn
+ (defun ,name (string sstart send array astart aend)
+ (,(make-od-name 'latin->string* accessor) string sstart send array astart aend #'latin9->code-mapper)))))
+(instantiate-octets-definition define-latin9->string*)
+
+(defmacro define-latin9->string (accessor type)
+ (declare (ignore type))
+ `(defun ,(make-od-name 'latin9->string accessor) (array astart aend)
+ (,(make-od-name 'latin->string accessor) array astart aend #'latin9->code-mapper)))
+ (instantiate-octets-definition define-latin9->string)
+
+;;; The names for latin9 are different due to a historical accident.
+(define-external-format (:latin-9 :latin9 :iso-8859-15 :iso8859-15)
+ 1 t
+ (let ((latin-9-byte (code->latin9-mapper bits)))
+ (if latin-9-byte
+ (setf (sap-ref-8 sap tail) latin-9-byte)
+ (external-format-encoding-error stream bits)))
+ (let ((code (latin9->code-mapper byte)))
+ (if code
+ (code-char code)
+ (external-format-decoding-error stream byte)))
+ latin9->string-aref
+ string->latin9)
Index: enc-win.lisp
===================================================================
RCS file: /cvsroot/sbcl/sbcl/src/code/external-formats/enc-win.lisp,v
retrieving revision 1.5
retrieving revision 1.6
diff -u -d -r1.5 -r1.6
--- enc-win.lisp 6 May 2007 02:28:43 -0000 1.5
+++ enc-win.lisp 29 Sep 2009 01:02:20 -0000 1.6
@@ -111,9 +111,6 @@
(instantiate-octets-definition define-cp1250->string)
-(add-external-format-funs '(:cp1250 :|cp1250| :windows-1250 :|windows-1250|)
- '(cp1250->string-aref string->cp1250))
-
(define-external-format (:cp1250 :|cp1250| :windows-1250 :|windows-1250|)
1 t
(let ((cp1250-byte (code->cp1250-mapper bits)))
@@ -123,7 +120,9 @@
(let ((code (cp1250->code-mapper byte)))
(if code
(code-char code)
- (external-format-decoding-error stream byte)))) ;; TODO -- error check
+ (external-format-decoding-error stream byte)))
+ cp1250->string-aref
+ string->cp1250) ;; TODO -- error check
(define-unibyte-mapper cp1251->code-mapper code->cp1251-mapper
(#x80 #x0402) ; CYRILLIC CAPITAL LETTER DJE
@@ -270,9 +269,6 @@
(instantiate-octets-definition define-cp1251->string)
-(add-external-format-funs '(:cp1251 :|cp1251| :windows-1251 :|windows-1251|)
- '(cp1251->string-aref string->cp1251))
-
(define-external-format (:cp1251 :|cp1251| :windows-1251 :|windows-1251|)
1 t
(let ((cp1251-byte (code->cp1251-mapper bits)))
@@ -282,7 +278,9 @@
(let ((code (cp1251->code-mapper byte)))
(if code
(code-char code)
- (external-format-decoding-error stream byte)))) ;; TODO -- error check
+ (external-format-decoding-error stream byte)))
+ cp1251->string-aref
+ string->cp1251) ;; TODO -- error check
(define-unibyte-mapper cp1252->code-mapper code->cp1252-mapper
(#x80 #x20AC) ; EURO SIGN
@@ -348,9 +346,6 @@
(instantiate-octets-definition define-cp1252->string)
-(add-external-format-funs '(:cp1252 :|cp1252| :windows-1252 :|windows-1252|)
- '(cp1252->string-aref string->cp1252))
-
(define-external-format (:cp1252 :|cp1252| :windows-1252 :|windows-1252|)
1 t
(let ((cp1252-byte (code->cp1252-mapper bits)))
@@ -360,7 +355,9 @@
(let ((code (cp1252->code-mapper byte)))
(if code
(code-char code)
- (external-format-decoding-error stream byte)))) ;; TODO -- error check
+ (external-format-decoding-error stream byte)))
+ cp1252->string-aref
+ string->cp1252) ;; TODO -- error check
(define-unibyte-mapper cp1253->code-mapper code->cp1253-mapper
(#x80 #x20AC) ; EURO SIGN
@@ -501,9 +498,6 @@
(instantiate-octets-definition define-cp1253->string)
-(add-external-format-funs '(:cp1253 :|cp1253| :windows-1253 :|windows-1253|)
- '(cp1253->string-aref string->cp1253))
-
(define-external-format (:cp1253 :|cp1253| :windows-1253 :|windows-1253|)
1 t
(let ((cp1253-byte (code->cp1253-mapper bits)))
@@ -513,7 +507,9 @@
(let ((code (cp1253->code-mapper byte)))
(if code
(code-char code)
- (external-format-decoding-error stream byte)))) ;; TODO -- error check
+ (external-format-decoding-error stream byte)))
+ cp1253->string-aref
+ string->cp1253) ;; TODO -- error check
(define-unibyte-mapper cp1254->code-mapper code->cp1254-mapper
(#x80 #x20AC) ; EURO SIGN
@@ -585,9 +581,6 @@
(instantiate-octets-definition define-cp1254->string)
-(add-external-format-funs '(:cp1254 :|cp1254| :windows-1254 :|windows-1254|)
- '(cp1254->string-aref string->cp1254))
-
(define-external-format (:cp1254 :|cp1254|)
1 t
(let ((cp1254-byte (code->cp1254-mapper bits)))
@@ -597,7 +590,9 @@
(let ((code (cp1254->code-mapper byte)))
(if code
(code-char code)
- (external-format-decoding-error stream byte)))) ;; TODO -- error check
+ (external-format-decoding-error stream byte)))
+ cp1254->string-aref
+ string->cp1254) ;; TODO -- error check
(define-unibyte-mapper cp1255->code-mapper code->cp1255-mapper
(#x80 #x20AC) ; EURO SIGN
@@ -730,9 +725,6 @@
(instantiate-octets-definition define-cp1255->string)
-(add-external-format-funs '(:cp1255 :|cp1255| :windows-1255 :|windows-1255|)
- '(cp1255->string-aref string->cp1255))
-
(define-external-format (:cp1255 :|cp1255| :windows-1255 :|windows-1255|)
1 t
(let ((cp1255-byte (code->cp1255-mapper bits)))
@@ -742,7 +734,9 @@
(let ((code (cp1255->code-mapper byte)))
(if code
(code-char code)
- (external-format-decoding-error stream byte)))) ;; TODO -- error check
+ (external-format-decoding-error stream byte)))
+ cp1255->string-aref
+ string->cp1255) ;; TODO -- error check
(define-unibyte-mapper cp1256->code-mapper code->cp1256-mapper
(#x80 #x20AC) ; EURO SIGN
@@ -861,10 +855,7 @@
(instantiate-octets-definition define-cp1256->string)
-(add-external-format-funs '(:cp1256 :|cp1256| :windows-1256 :|windows-1256|)
- '(cp1256->string-aref string->cp1256))
-
-(define-external-format (:cp1256 :|cp1256|)
+(define-external-format (:cp1256 :|cp1256| :windows-1256 :|windows-1256|)
1 t
(let ((cp1256-byte (code->cp1256-mapper bits)))
(if cp1256-byte
@@ -873,7 +864,9 @@
(let ((code (cp1256->code-mapper byte)))
(if code
(code-char code)
- (external-format-decoding-error stream byte)))) ;; TODO -- error check
+ (external-format-decoding-error stream byte)))
+ cp1256->string-aref
+ string->cp1256) ;; TODO -- error check
(define-unibyte-mapper cp1257->code-mapper code->cp1257-mapper
(#x80 #x20AC) ; EURO SIGN
@@ -994,9 +987,6 @@
(instantiate-octets-definition define-cp1257->string)
-(add-external-format-funs '(:cp1257 :|cp1257| :windows-1257 :|windows-1257|)
- '(cp1257->string-aref string->cp1257))
-
(define-external-format (:cp1257 :|cp1257| :windows-1257 :|windows-1257|)
1 t
(let ((cp1257-byte (code->cp1257-mapper bits)))
@@ -1006,7 +996,9 @@
(let ((code (cp1257->code-mapper byte)))
(if code
(code-char code)
- (external-format-decoding-error stream byte)))) ;; TODO -- error check
+ (external-format-decoding-error stream byte)))
+ cp1257->string-aref
+ string->cp1257) ;; TODO -- error check
(define-unibyte-mapper cp1258->code-mapper code->cp1258-mapper
(#x80 #x20AC) ; EURO SIGN
@@ -1086,9 +1078,6 @@
(instantiate-octets-definition define-cp1258->string)
-(add-external-format-funs '(:cp1258 :|cp1258| :windows-1258 :|windows-1258|)
- '(cp1258->string-aref string->cp1258))
-
(define-external-format (:cp1258 :|cp1258| :windows-1258 :|windows-1258|)
1 t
(let ((cp1258-byte (code->cp1258-mapper bits)))
@@ -1098,4 +1087,7 @@
(let ((code (cp1258->code-mapper byte)))
(if code
(code-char code)
- (external-format-decoding-error stream byte)))) ;; TODO -- error check
+ (external-format-decoding-error stream byte)))
+ cp1258->string-aref
+ string->cp1258) ;; TODO -- error check
+
Index: mb-util.lisp
===================================================================
RCS file: /cvsroot/sbcl/sbcl/src/code/external-formats/mb-util.lisp,v
retrieving revision 1.3
retrieving revision 1.4
diff -u -d -r1.3 -r1.4
--- mb-util.lisp 10 Sep 2007 13:31:46 -0000 1.3
+++ mb-util.lisp 29 Sep 2009 01:02:20 -0000 1.4
@@ -183,38 +183,6 @@
(define-mb->string
(make-od-name-list 'define format '>string)))
`(progn
- ;; for fd-stream.lisp
- (define-external-format/variable-width ,aliases t
- (mb-char-len (or (,ucs-to-mb (char-code byte)) -1))
- (let ((mb (,ucs-to-mb bits)))
- (if (null mb)
- (external-format-encoding-error stream byte)
- (ecase size
- (1 (setf (sap-ref-8 sap tail) mb))
- (2 (setf (sap-ref-8 sap tail) (ldb (byte 8 8) mb)
- (sap-ref-8 sap (1+ tail)) (ldb (byte 8 0) mb)))
- (3 (setf (sap-ref-8 sap tail) (ldb (byte 8 16) mb)
- (sap-ref-8 sap (1+ tail)) (ldb (byte 8 8) mb)
- (sap-ref-8 sap (+ 2 tail)) (ldb (byte 8 0) mb))))))
- (,mb-len byte)
- (let* ((mb (ecase size
- (1 byte)
- (2 (let ((byte2 (sap-ref-8 sap (1+ head))))
- (unless (,mb-continuation-byte-p byte2)
- (return-from decode-break-reason 2))
- (dpb byte (byte 8 8) byte2)))
- (3 (let ((byte2 (sap-ref-8 sap (1+ head)))
- (byte3 (sap-ref-8 sap (+ 2 head))))
- (unless (,mb-continuation-byte-p byte2)
- (return-from decode-break-reason 2))
- (unless (,mb-continuation-byte-p byte3)
- (return-from decode-break-reason 3))
- (dpb byte (byte 8 16) (dpb byte2 (byte 8 8) byte3))))))
- (ucs (,mb-to-ucs mb)))
- (if (null ucs)
- (return-from decode-break-reason 1)
- (code-char ucs))))
-
;; for octets.lisp
(define-condition ,(make-od-name 'malformed format)
(octet-decoding-error) ())
@@ -276,7 +244,36 @@
(instantiate-octets-definition ,define-mb->string)
- (add-external-format-funs ',aliases
- '(,(make-od-name format '>string-aref)
- ,string->mb))
- )))
+ ;; for fd-stream.lisp
+ (define-external-format/variable-width ,aliases t
+ (mb-char-len (or (,ucs-to-mb (char-code byte)) -1))
+ (let ((mb (,ucs-to-mb bits)))
+ (if (null mb)
+ (external-format-encoding-error stream byte)
+ (ecase size
+ (1 (setf (sap-ref-8 sap tail) mb))
+ (2 (setf (sap-ref-8 sap tail) (ldb (byte 8 8) mb)
+ (sap-ref-8 sap (1+ tail)) (ldb (byte 8 0) mb)))
+ (3 (setf (sap-ref-8 sap tail) (ldb (byte 8 16) mb)
+ (sap-ref-8 sap (1+ tail)) (ldb (byte 8 8) mb)
+ (sap-ref-8 sap (+ 2 tail)) (ldb (byte 8 0) mb))))))
+ (,mb-len byte)
+ (let* ((mb (ecase size
+ (1 byte)
+ (2 (let ((byte2 (sap-ref-8 sap (1+ head))))
+ (unless (,mb-continuation-byte-p byte2)
+ (return-from decode-break-reason 2))
+ (dpb byte (byte 8 8) byte2)))
+ (3 (let ((byte2 (sap-ref-8 sap (1+ head)))
+ (byte3 (sap-ref-8 sap (+ 2 head))))
+ (unless (,mb-continuation-byte-p byte2)
+ (return-from decode-break-reason 2))
+ (unless (,mb-continuation-byte-p byte3)
+ (return-from decode-break-reason 3))
+ (dpb byte (byte 8 16) (dpb byte2 (byte 8 8) byte3))))))
+ (ucs (,mb-to-ucs mb)))
+ (if (null ucs)
+ (return-from decode-break-reason 1)
+ (code-char ucs)))
+ ,(make-od-name format '>string-aref)
+ ,string->mb))))
Index: ucs-2.lisp
===================================================================
RCS file: /cvsroot/sbcl/sbcl/src/code/external-formats/ucs-2.lisp,v
retrieving revision 1.3
retrieving revision 1.4
diff -u -d -r1.3 -r1.4
--- ucs-2.lisp 6 May 2007 02:28:43 -0000 1.3
+++ ucs-2.lisp 29 Sep 2009 01:02:20 -0000 1.4
@@ -34,26 +34,6 @@
(sap-ref-8 sap offset) (ldb (byte 8 8) value)))
;;;
-;;; Define external format: fd-stream
-;;;
-(define-external-format/variable-width (:ucs-2le :ucs2le #!+win32 :ucs2 #!+win32 :ucs-2) nil
- 2
- (if (< bits #x10000)
- (setf (sap-ref-16le sap tail) bits)
- (external-format-encoding-error stream bits))
- 2
- (code-char (sap-ref-16le sap head)))
-
-(define-external-format/variable-width (:ucs-2be :ucs2be) nil
- 2
- (if (< bits #x10000)
- (setf (sap-ref-16be sap tail) bits)
- (external-format-encoding-error stream bits))
- 2
- (code-char (sap-ref-16be sap head)))
-
-
-;;;
;;; octets
;;;
@@ -209,8 +189,22 @@
(instantiate-octets-definition define-ucs-2->string)
-(add-external-format-funs '(:ucs-2le :ucs2le #!+win32 :ucs2 #!+win32 :ucs-2)
- '(ucs-2le->string-aref string->ucs-2le))
+(define-external-format/variable-width (:ucs-2le :ucs2le #!+win32 :ucs2 #!+win32 :ucs-2) nil
+ 2
+ (if (< bits #x10000)
+ (setf (sap-ref-16le sap tail) bits)
+ (external-format-encoding-error stream bits))
+ 2
+ (code-char (sap-ref-16le sap head))
+ ucs-2le->string-aref
+ string->ucs-2le)
-(add-external-format-funs '(:ucs-2be :ucs2be)
- '(ucs-2be->string-aref string->ucs-2be))
+(define-external-format/variable-width (:ucs-2be :ucs2be) nil
+ 2
+ (if (< bits #x10000)
+ (setf (sap-ref-16be sap tail) bits)
+ (external-format-encoding-error stream bits))
+ 2
+ (code-char (sap-ref-16be sap head))
+ ucs-2be->string-aref
+ string->ucs-2be)
|