Commit [0d5ff0] Maximize Restore History

0.9.18.45

"Patch to update Japanese external formats" (NIIMI Satoshi, 2006-11-10)
* New external format: Shift-JIS

Rudi Schlatte Rudi Schlatte 2006-11-12

added src/code/external-formats/enc-jpn-tbl.lisp
added src/code/external-formats/mb-util.lisp
added tests/enc-jpn.impure.lisp
removed src/code/external-formats/eucjp.lisp
removed tests/eucjp-test.lisp-expr
changed src/code/win32.lisp
changed NEWS
changed build-order.lisp-expr
changed version.lisp-expr
copied tests/eucjp.impure.lisp -> src/code/external-formats/enc-jpn.lisp
src/code/external-formats/enc-jpn-tbl.lisp Diff Switch to side-by-side view
Loading...
src/code/external-formats/mb-util.lisp Diff Switch to side-by-side view
Loading...
tests/enc-jpn.impure.lisp Diff Switch to side-by-side view
Loading...
tests/eucjp-test.lisp-expr
File was removed.
src/code/win32.lisp Diff Switch to side-by-side view
Loading...
NEWS Diff Switch to side-by-side view
Loading...
build-order.lisp-expr Diff Switch to side-by-side view
Loading...
version.lisp-expr Diff Switch to side-by-side view
Loading...
tests/eucjp.impure.lisp to src/code/external-formats/enc-jpn.lisp
--- a/tests/eucjp.impure.lisp
+++ b/src/code/external-formats/enc-jpn.lisp
@@ -1,86 +1,62 @@
-#-sb-unicode
-(sb-ext:quit :unix-status 104)
+(in-package "SB!IMPL")
 
-(let ((p "eucjp-test.data")
-      (eucjp "eucjp-test-eucjp.data")
-      (utf8 "eucjp-test-utf8.data"))
+;;; EUC-JP
+(declaim (inline ucs-to-eucjp eucjp-to-ucs
+                 mb-len-as-eucjp eucjp-continuation-byte-p))
 
-  ;; generate test data
-  (with-open-file (in "eucjp-test.lisp-expr" :direction :input)
-    (with-open-file (out-eucjp eucjp :direction :output
-                               :element-type '(unsigned-byte 8)
-                               :if-exists :supersede)
-      (with-open-file (out-utf8 utf8 :direction :output
-                                :external-format :utf-8
-                                :if-exists :supersede)
-        (do ((euc (read in nil) (read in nil))
-             (ucs (read in nil) (read in nil))
-             (i 0 (1+ i)))
-            ((or (null euc) (null ucs)))
-          ;; write EUC-JP data as binary
-          (let ((out out-eucjp))
-            (when (>= euc #x10000)
-              (write-byte (ldb (byte 8 16) euc) out))
-            (when (>= euc #x100)
-              (write-byte (ldb (byte 8 8) euc) out))
-            (write-byte (ldb (byte 8 0) euc) out)
-            (when (= (mod i 32) 31)
-              (write-byte #x0a out)))
-          ;; trust UTF-8 external format
-          (let ((out out-utf8))
-            (write-char (code-char ucs) out)
-            (when (= (mod i 32) 31)
-              (write-char (code-char #x0a) out)))))))
+(defun ucs-to-eucjp (code)
+  (declare (optimize speed (safety 0))
+           (type fixnum code))
+  (if (<= code #x7F) code
+      (get-multibyte-mapper *ucs-to-eucjp-table* code)))
 
-  ;; check if input works
-  (with-open-file (in1 eucjp :direction :input
-                       :external-format :euc-jp)
-    (with-open-file (in2 utf8 :direction :input
-                         :external-format :utf-8)
-      (do ((c1 (read-char in1 nil) (read-char in1 nil))
-           (c2 (read-char in2 nil) (read-char in2 nil)))
-          ((and (null c1) (null c2)))
-        (assert (eql c1 c2)))))
+(defun eucjp-to-ucs (code)
+  (declare (optimize speed (safety 0))
+           (type fixnum code))
+  (if (<= code #x7F) code
+      (get-multibyte-mapper *eucjp-to-ucs-table* code)))
 
-  ;; check if output works
-  (with-open-file (in utf8 :direction :input
-                      :external-format :utf-8)
-    (with-open-file (out p :direction :output
-                         :external-format :euc-jp
-                         :if-exists :supersede)
-      (do ((c (read-char in nil) (read-char in nil)))
-          ((null c))
-        (write-char c out))))
-  (with-open-file (in1 eucjp :direction :input
-                       :element-type '(unsigned-byte 8))
-    (with-open-file (in2 p :direction :input
-                         :element-type '(unsigned-byte 8))
-      (do ((b1 (read-byte in1 nil) (read-byte in1 nil))
-           (b2 (read-byte in2 nil) (read-byte in2 nil)))
-          ((and (null b1) (null b2)))
-        (assert (eql b1 b2)))))
-  (delete-file p)
-  (delete-file eucjp)
-  (delete-file utf8))
+(defun mb-len-as-eucjp (code)
+  (declare (optimize speed (safety 0))
+           (type (unsigned-byte 8) code))
+  (cond ((< code #x80) 1)
+        ((or (= code #x8E) (<= #xA1 code #xFE)) 2)
+        ((= code #x8F) 3)))
 
-;; check if string conversion works
-(with-open-file (in "eucjp-test.lisp-expr" :direction :input)
-  (do ((euc (read in nil) (read in nil))
-       (ucs (read in nil) (read in nil))
-       (i 0 (1+ i)))
-      ((or (null euc) (null ucs)))
-    (let ((o (coerce (cond ((>= euc #x10000)
-                            (list (ldb (byte 8 16) euc)
-                                  (ldb (byte 8 8) euc)
-                                  (ldb (byte 8 0) euc)))
-                           ((>= euc #x100)
-                            (list (ldb (byte 8 8) euc)
-                                  (ldb (byte 8 0) euc)))
-                           (t (list euc)))
-                     '(vector (unsigned-byte 8))))
-          (s (string (code-char ucs))))
-      (assert (equal (octets-to-string o :external-format :euc-jp) s))
-      (assert (equal (coerce (string-to-octets s :external-format :euc-jp)
-                             'list)
-                     (coerce o 'list))))))
-;;; success
+(defun eucjp-continuation-byte-p (code)
+  (declare (optimize speed (safety 0))
+           (type (unsigned-byte 8) code))
+  (<= #xA1 code #xFE))
+
+(define-multibyte-encoding :euc-jp (:euc-jp :eucjp :|eucJP|)
+  ucs-to-eucjp eucjp-to-ucs mb-len-as-eucjp eucjp-continuation-byte-p)
+
+;;; Shift_JIS
+(declaim (inline ucs-to-sjis sjis-to-ucs
+                 mb-len-as-sjis sjis-continuation-byte-p))
+
+(defun ucs-to-sjis (code)
+  (declare (optimize speed (safety 0))
+           (type fixnum code))
+  (if (<= code #x7F) code
+      (get-multibyte-mapper *ucs-to-sjis-table* code)))
+
+(defun sjis-to-ucs (code)
+  (declare (optimize speed (safety 0))
+           (type fixnum code))
+  (if (<= code #x7F) code
+      (get-multibyte-mapper *sjis-to-ucs-table* code)))
+
+(defun mb-len-as-sjis (code)
+  (declare (optimize speed (safety 0))
+           (type (unsigned-byte 8) code))
+  (cond ((or (< code #x80) (<= #xA1 code #xDF)) 1)
+        ((or (<= #x81 code #x9F) (<= #xE0 code #xFC)) 2)))
+
+(defun sjis-continuation-byte-p (code)
+  (declare (optimize speed (safety 0))
+           (type (unsigned-byte 8) code))
+  (or (<= #x40 code #x7E) (<= #x80 code #xFC)))
+
+(define-multibyte-encoding :shift_jis (:shift_jis :sjis :|Shift_JIS| :cp932)
+  ucs-to-sjis sjis-to-ucs mb-len-as-sjis sjis-continuation-byte-p)