[Substrate-commits] SF.net SVN: substrate: [268] trunk
Brought to you by:
landonf
|
From: <la...@us...> - 2006-08-31 00:02:40
|
Revision: 268
http://svn.sourceforge.net/substrate/?rev=268&view=rev
Author: landonf
Date: 2006-08-30 17:02:26 -0700 (Wed, 30 Aug 2006)
Log Message:
-----------
Implement support for NSString's dataUsingEncoding:(NSStringEncoding)encoding allowLossyConversion:(BOOL)lossy, and modify the NSUnicodeString tests to use it.
allowLossyConversion: NO is still unimplemented.
Modified Paths:
--------------
trunk/Foundation/NSString.h
trunk/Foundation/NSString.m
trunk/Foundation/NSUnicodeString.m
trunk/Tests/NSString.m
trunk/Tests/NSUnicodeString.m
Property Changed:
----------------
trunk/
Property changes on: trunk
___________________________________________________________________
Name: svk:merge
- 11572a18-12fc-0310-9209-f8edcc8181a7:/local/substrate/trunk:5682
+ 11572a18-12fc-0310-9209-f8edcc8181a7:/local/substrate/trunk:5684
Modified: trunk/Foundation/NSString.h
===================================================================
--- trunk/Foundation/NSString.h 2006-08-30 23:20:17 UTC (rev 267)
+++ trunk/Foundation/NSString.h 2006-08-31 00:02:26 UTC (rev 268)
@@ -39,6 +39,8 @@
#include <Foundation/NSObject.h>
#include <Foundation/NSRange.h>
+@class NSData;
+
/*!
* @ingroup NSString
* @{
@@ -117,13 +119,15 @@
- (const char *) cString;
/* Initialization */
-+ string;
-- initWithCString:(const char *)cStr encoding:(NSStringEncoding)encoding;
++ (id) string;
++ (id) stringWithCString:(const char *)cStr encoding:(NSStringEncoding)encoding;
+- (id) initWithCString:(const char *)cStr encoding:(NSStringEncoding)encoding;
- (id) initWithBytes:(const void *)bytes length:(unsigned int)len encoding:(NSStringEncoding)encoding;
- (id) initWithBytesNoCopy:(void *)bytes length:(unsigned int)len encoding:(NSStringEncoding)encoding freeWhenDone:(BOOL) shouldFree;
/* Encoding */
+ (NSStringEncoding) defaultCStringEncoding;
+- (NSData *) dataUsingEncoding:(NSStringEncoding)encoding allowLossyConversion:(BOOL)lossy;
/* Deprecated */
+ stringWithCString:(const char *)cStr;
Modified: trunk/Foundation/NSString.m
===================================================================
--- trunk/Foundation/NSString.m 2006-08-30 23:20:17 UTC (rev 267)
+++ trunk/Foundation/NSString.m 2006-08-31 00:02:26 UTC (rev 268)
@@ -94,6 +94,38 @@
return NSUTF8StringEncoding;
}
+/*!
+ * Convert the receiving string to the specified encoding, returning
+ * the result as an instance of NSData.
+ * If allowLossyConversion is NO, and the encoding conversion can not be done
+ * without data loss, nil will be returned.
+ * @param encoding Encoding to convert receiver to.
+ * @param lossy If YES, permits lossy conversion.
+ * @return NSData object containing the converted string data, or nil on failure.
+ */
+- (NSData *) dataUsingEncoding:(NSStringEncoding)encoding allowLossyConversion:(BOOL)lossy {
+ /*
+ * Ineffecient superclass implementation.
+ * We acquire a copy our UTF-16 byte array, instantiate a zero-copy
+ * NSUnicodeString instance, and ask it to convert the array for us.
+ */
+ NSUnicodeString *ucnv;
+ unsigned int length;
+ unichar *buffer;
+ NSData *result;
+
+ length = [self length];
+ buffer = NSZoneMalloc(NULL, length * sizeof(unichar));
+ [self getCharacters: buffer];
+ /* Length required here is byte length, not character length. Ugh */
+ ucnv = [[NSUnicodeString alloc] initWithBytesNoCopy: buffer length: length * sizeof(unichar) encoding: NSUnicodeStringEncoding freeWhenDone: YES];
+
+ result = [ucnv dataUsingEncoding: encoding allowLossyConversion: lossy];
+ [ucnv release];
+
+ return result;
+}
+
/*! @} */
@@ -103,19 +135,20 @@
*/
/*!
- * Allocate and initialize an NSString instance with the supplied C string.
+ * Allocate and initialize an NSString instance with the supplied C string, of the specified encoding.
* @param cStr NULL-terminted C string.
+ * @param encoding Character encoding for the C String
* @return Allocated and initialized NSString instance.
*/
-+ stringWithCString:(const char *)cStr {
- return [[[self alloc] initWithCString:cStr] autorelease];
++ (id) stringWithCString:(const char *)cStr encoding:(NSStringEncoding)encoding {
+ return [[[self alloc] initWithCString: cStr encoding: encoding] autorelease];
}
/*!
* Allocate and initialize an empty NSString instance.
* @return New empty NSString instance.
*/
-+ string {
++ (id) string {
[self release];
return [[[NSUnicodeString alloc] init] autorelease];
}
@@ -269,7 +302,18 @@
*/
/*!
- * Initialize the receiver with the supplied C string, using the default string encoding.
+ * Allocate and initialize an NSString instance with the supplied C string, using the default C string encoding.
+ * @deprecated Use stringWithCString:encoding: instead.
+ * @param cStr NULL-terminted C string.
+ * @return Allocated and initialized NSString instance.
+ */
++ stringWithCString:(const char *)cStr {
+ return [self stringWithCString: cStr encoding: [self defaultCStringEncoding]];
+}
+
+
+/*!
+ * Initialize the receiver with the supplied C string, using the default C string encoding.
* @deprecated Caller's should make use of initWithCString:encoding: instead.
* @param cStr NULL-terminted C string.
* @return Allocated and initialized NSString instance.
Modified: trunk/Foundation/NSUnicodeString.m
===================================================================
--- trunk/Foundation/NSUnicodeString.m 2006-08-30 23:20:17 UTC (rev 267)
+++ trunk/Foundation/NSUnicodeString.m 2006-08-31 00:02:26 UTC (rev 268)
@@ -34,6 +34,7 @@
#endif
#include <Foundation/NSUnicodeString.h>
+#include <Foundation/NSData.h>
#include <Foundation/NSException.h>
@@ -250,6 +251,7 @@
continue;
} else {
[NSException raise: NSInternalInconsistencyException format: @"Failure in IBM ICU's ucnv_toUChars()"];
+ NSZoneFree(NULL, _string);
ucnv_close(conv);
[self release];
return nil;
@@ -352,6 +354,168 @@
/*! @} */
+/*!
+ * @name Encoding
+ * @{
+ */
+
+/*! Convert to the specified encoding.
+ * @todo Need better heuristics for determining destination buffer length, and
+ * for deciding when we should call realloc.
+ */
+- (NSData *) dataUsingEncoding:(NSStringEncoding)encoding allowLossyConversion:(BOOL)lossy {
+ UConverter *conv;
+ UErrorCode err = U_ZERO_ERROR;
+ void *bytes;
+ size_t allocSize;
+
+ /* Acquire a converter, and make an educated guess as to the correct allocation */
+ switch (encoding) {
+ /* 7-bit ASCII encoding, using 8-bit characters. */
+ case NSASCIIStringEncoding:
+ conv = ucnv_open("US-ASCII", &err);
+ allocSize = _length;
+ break;
+
+ /* 8-bit EUC encoding for Japanese. */
+ case NSJapaneseEUCStringEncoding:
+ conv = ucnv_open("EUC-JP", &err);
+ allocSize = _length;
+ break;
+
+ /* 8-bit Unicode encoding (UTF-8). */
+ case NSUTF8StringEncoding:
+ conv = ucnv_open("UTF-8", &err);
+ allocSize = _length;
+ break;
+
+ /* 8-bit ISO Latin 1 Encoding. */
+ case NSISOLatin1StringEncoding:
+ conv = ucnv_open("latin1", &err);
+ allocSize = _length;
+ break;
+
+ /* 7-bit non-lossy Unicode encoding (UTF-7). */
+ case NSNonLossyASCIIStringEncoding:
+ conv = ucnv_open("UTF-7", &err);
+ allocSize = _length;
+ break;
+
+ /* 8-bit Japanese Shift_JIS Encoding. */
+ case NSShiftJISStringEncoding:
+ conv = ucnv_open("Shift_JIS", &err);
+ allocSize = _length;
+ break;
+
+ /* 8-bit ISO Latin 2 Encoding. */
+ case NSISOLatin2StringEncoding:
+ conv = ucnv_open("latin2", &err);
+ allocSize = _length;
+ break;
+
+ case NSProprietaryStringEncoding: /* Fall through to UTF-16 */
+ /* Canonical encoding for NSString objects (UTF-16). */
+ case NSUnicodeStringEncoding:
+ /* UTF-16-LE is the canonical byte ordering for UTF-16 strings in
+ * both Substrate and Apple's Foundation */
+ conv = ucnv_open("UTF-16-LE", &err);
+ allocSize = _length;
+ break;
+
+ /* Microsoft Windows codepage 1251 for Cryillic characters. */
+ case NSWindowsCP1251StringEncoding:
+ conv = ucnv_open("windows-1251", &err);
+ allocSize = _length;
+ break;
+
+ /* Microsoft Windows codepage 1252 (WinLatin1). */
+ case NSWindowsCP1252StringEncoding:
+ conv = ucnv_open("windows-1252", &err);
+ allocSize = _length;
+ break;
+
+ /* Microsoft Windows codepage 1253, for Greek characters. */
+ case NSWindowsCP1253StringEncoding:
+ conv = ucnv_open("windows-1253", &err);
+ allocSize = _length;
+ break;
+
+ /* Microsoft Windows codepage 1254, for Turkish characters. */
+ case NSWindowsCP1254StringEncoding:
+ conv = ucnv_open("windows-1254", &err);
+ allocSize = _length;
+ break;
+
+ /* Microsoft Windows codepage 1250 (WinLatin2). */
+ case NSWindowsCP1250StringEncoding:
+ conv = ucnv_open("windows-1250", &err);
+ allocSize = _length;
+ break;
+
+ /* ISO 2022 Japanese for e-mail. */
+ case NSISO2022JPStringEncoding:
+ conv = ucnv_open("ISO-2022-JP", &err);
+ allocSize = _length;
+ break;
+
+ /* Legacy Mac Roman Encoding. */
+ case NSMacOSRomanStringEncoding:
+ conv = ucnv_open("macintosh", &err);
+ allocSize = _length;
+ break;
+
+ /* Unhandled encodings */
+ case NSNEXTSTEPStringEncoding:
+ case NSSymbolStringEncoding:
+ default:
+ [NSException raise: NSInvalidArgumentException format: @"Unknown NSStringEncoding"];
+ return nil;
+ }
+
+ /* Did ucnv_open() fail? */
+ if (U_FAILURE(err)) {
+ [NSException raise: NSInternalInconsistencyException format: @"Failure in IBM ICU's ucnv_open()"];
+ [self release];
+ return nil;
+ }
+
+ /* Do the conversion */
+ bytes = NSZoneMalloc(NULL, allocSize);
+ while (1) {
+ size_t newAllocSize;
+
+ err = U_ZERO_ERROR;
+ newAllocSize = ucnv_fromUChars(conv, bytes, allocSize, _string, _length, &err);
+
+ if (U_SUCCESS(err)) {
+ /* Don't bother calling realloc for very small differences */
+ if (newAllocSize != allocSize && newAllocSize - allocSize >= 5)
+ bytes = NSZoneRealloc(NULL, bytes, newAllocSize);
+ allocSize = newAllocSize;
+ break;
+ }
+
+ if (err == U_BUFFER_OVERFLOW_ERROR) {
+ allocSize = newAllocSize;
+ bytes = NSZoneRealloc(NULL, bytes, allocSize);
+ continue;
+ } else {
+ [NSException raise: NSInternalInconsistencyException format: @"Failure in IBM ICU's ucnv_toUChars()"];
+ NSZoneFree(NULL, bytes);
+ ucnv_close(conv);
+ [self release];
+ return nil;
+ }
+ }
+
+ /* Success. Let's shove it into an NSData instance */
+ return [NSData dataWithBytesNoCopy: bytes length: allocSize freeWhenDone: YES];
+}
+
+
+
+/*! @} */
+
@end
/*! @} */
Modified: trunk/Tests/NSString.m
===================================================================
--- trunk/Tests/NSString.m 2006-08-30 23:20:17 UTC (rev 267)
+++ trunk/Tests/NSString.m 2006-08-31 00:02:26 UTC (rev 268)
@@ -34,6 +34,7 @@
#endif
#include <Foundation/NSString.h>
+#include <Foundation/NSData.h>
#include <Foundation/NSException.h>
#include <Foundation/NSAutoreleasePool.h>
#include "LFObjCRuntime.h"
@@ -48,6 +49,11 @@
@interface NSDumbString : NSString
@end
@implementation NSDumbString
+
+- (id) initWithBytes:(const void *)bytes length:(unsigned int) length encoding:(NSStringEncoding)encoding {
+ return self;
+}
+
- (unichar) characterAtIndex:(unsigned int) index {
return (unichar) TEST_STRING[index];
}
@@ -110,6 +116,14 @@
}
END_TEST
+START_TEST (test_dataUsingEncodingAllowLossyConversion) {
+ NSString *string = [NSDumbString stringWithCString: TEST_STRING encoding: NSASCIIStringEncoding];
+ NSData *data = [string dataUsingEncoding: NSASCIIStringEncoding allowLossyConversion: NO];
+ fail_if(data == nil, "-[NSData dataUsingEncoding: NSASCIIStringEncoding allowLossyConversion: NO] returned nil.");
+ fail_unless([data length] == sizeof(TEST_STRING) - 1, "-[NSData length] returned unexpected byte length (Expected %d, got %d)", sizeof(TEST_STRING), [data length]);
+ fail_unless(strncmp([data bytes], TEST_STRING, sizeof(TEST_STRING - 1)) == 0, "-[NSData dataUsingEncoding: NSASCIIStringEncoding allowLossyConversion: NO] returned unexpected data.");
+}
+END_TEST
/* Initialization */
@@ -176,6 +190,7 @@
tcase_add_checked_fixture(tc_encoding, setUp, tearDown);
suite_add_tcase(s, tc_encoding);
tcase_add_test(tc_encoding, test_cls_defaultCStringEncoding);
+ tcase_add_test(tc_encoding, test_dataUsingEncodingAllowLossyConversion);
TCase *tc_init = tcase_create("Initialization");
tcase_add_checked_fixture(tc_init, setUp, tearDown);
Modified: trunk/Tests/NSUnicodeString.m
===================================================================
--- trunk/Tests/NSUnicodeString.m 2006-08-30 23:20:17 UTC (rev 267)
+++ trunk/Tests/NSUnicodeString.m 2006-08-31 00:02:26 UTC (rev 268)
@@ -28,6 +28,7 @@
#include <config.h>
#endif
+#include <Foundation/NSData.h>
#include <Foundation/NSString.h>
#include <Foundation/NSAutoreleasePool.h>
@@ -88,20 +89,22 @@
0xb9, 0x74, 0x69, 0x6e, 0x79
};
+/* English UTF-16-LE "Hello, Universe", assembled by byte
+ * UTF-16-LE is the canonical UTF-16 string encoding for both Substrate
+ * and Apple's foundation. */
+static const unsigned char NSUnicodeStringEncoding_data[] = {
+ 0xff, 0xfe, 0x48, 0x00, 0x65, 0x00, 0x6c, 0x00, 0x6c, 0x00, 0x6f, 0x00,
+ 0x2c, 0x00, 0x20, 0x00, 0x55, 0x00, 0x6e, 0x00, 0x69, 0x00, 0x76, 0x00,
+ 0x65, 0x00, 0x72, 0x00, 0x73, 0x00, 0x65, 0x00, 0x0, 0x0
+};
+
/* English UTF-16-BE "Hello, Universe", assembled by byte */
-static const unsigned char NSUnicodeStringEncoding_data[] = {
+static const unsigned char NSUnicodeStringEncodingBE_data[] = {
0xfe, 0xff, 0x00, 0x48, 0x00, 0x65, 0x00, 0x6c, 0x00, 0x6c, 0x00, 0x6f,
0x00, 0x2c, 0x00, 0x20, 0x00, 0x55, 0x00, 0x6e, 0x00, 0x69, 0x00, 0x76,
0x00, 0x65, 0x00, 0x72, 0x00, 0x73, 0x00, 0x65, 0x0, 0x0
};
-/* English UTF-16-LE "Hello, Universe", assembled by byte */
-static const unsigned char NSUnicodeStringEncodingLE_data[] = {
- 0xff, 0xfe, 0x48, 0x00, 0x65, 0x00, 0x6c, 0x00, 0x6c, 0x00, 0x6f, 0x00,
- 0x2c, 0x00, 0x20, 0x00, 0x55, 0x00, 0x6e, 0x00, 0x69, 0x00, 0x76, 0x00,
- 0x65, 0x00, 0x72, 0x00, 0x73, 0x00, 0x65, 0x00, 0x0, 0x0
-};
-
/* Russian */
static const unsigned char NSWindowsCP1251StringEncoding_data[] = {
0xc0, 0xe2, 0xf2, 0xee, 0xec, 0xe0, 0xf2, 0xe8, 0xe7, 0xe0, 0xf6,
@@ -163,8 +166,12 @@
START_TEST(test_ ## stringEncoding) { \
NSAutoreleasePool *pool = [[NSAutoreleasePool alloc] init]; \
NSString *string; \
+ NSData *data; \
string = [[NSString alloc] initWithBytes: stringEncoding ## _data length: sizeof(stringEncoding ## _data) encoding: stringEncoding]; \
fail_if(string == nil, "-[NSString initWithBytes: length: encoding:] return nil for NSStringEncoding %d.", stringEncoding); \
+ data = [string dataUsingEncoding: stringEncoding allowLossyConversion: NO]; \
+ fail_if(data == nil, "-[NSString dataUsingEncoding: allowLossyConversion:] return nil for NSStringEncoding %d.", stringEncoding); \
+ fail_unless(memcmp([data bytes], stringEncoding ## _data, sizeof(stringEncoding ## _data)) == 0, "-[NSString dataUsingEncoding: allowLossyConversion:] return data that does not match the original for NSStringEncoding %d.", stringEncoding); \
[pool release]; \
[string release]; \
} \
@@ -191,26 +198,26 @@
void *bytes;
/* No free, UTF-16-BE */
- string = [[NSString alloc] initWithBytesNoCopy: (void *) NSUnicodeStringEncoding_data length: sizeof(NSUnicodeStringEncoding_data) encoding: NSUnicodeStringEncoding freeWhenDone: NO];
+ string = [[NSString alloc] initWithBytesNoCopy: (void *) NSUnicodeStringEncodingBE_data length: sizeof(NSUnicodeStringEncodingBE_data) encoding: NSUnicodeStringEncoding freeWhenDone: NO];
fail_if(string == nil, "-[NSString initWithBytesNoCopy: length: encoding: freeWhenDone: NO] returned nil with a UTF-16-BE encoded string.");
[string release];
/* No free, UTF-16-LE */
- string = [[NSString alloc] initWithBytesNoCopy: (void *) NSUnicodeStringEncodingLE_data length: sizeof(NSUnicodeStringEncodingLE_data) encoding: NSUnicodeStringEncoding freeWhenDone: NO];
+ string = [[NSString alloc] initWithBytesNoCopy: (void *) NSUnicodeStringEncoding_data length: sizeof(NSUnicodeStringEncoding_data) encoding: NSUnicodeStringEncoding freeWhenDone: NO];
fail_if(string == nil, "-[NSString initWithBytesNoCopy: length: encoding: freeWhenDone: NO] returned nil with a UTF-16-LE encoded string.");
[string release];
/* Free, UTF-16-BE */
- bytes = malloc(sizeof(NSUnicodeStringEncoding_data));
- memcpy(bytes, NSUnicodeStringEncoding_data, sizeof(NSUnicodeStringEncoding_data));
- string = [[NSString alloc] initWithBytesNoCopy: bytes length: sizeof(NSUnicodeStringEncoding_data) encoding: NSUnicodeStringEncoding freeWhenDone: YES];
+ bytes = malloc(sizeof(NSUnicodeStringEncodingBE_data));
+ memcpy(bytes, NSUnicodeStringEncodingBE_data, sizeof(NSUnicodeStringEncodingBE_data));
+ string = [[NSString alloc] initWithBytesNoCopy: bytes length: sizeof(NSUnicodeStringEncodingBE_data) encoding: NSUnicodeStringEncoding freeWhenDone: YES];
fail_if(string == nil, "-[NSString initWithBytesNoCopy: length: encoding: freeWhenDone: YES] returned nil with a UTF-16-BE encoded string.");
[string release];
/* Free, UTF-16-LE */
- bytes = malloc(sizeof(NSUnicodeStringEncodingLE_data));
- memcpy(bytes, NSUnicodeStringEncodingLE_data, sizeof(NSUnicodeStringEncodingLE_data));
- string = [[NSString alloc] initWithBytesNoCopy: bytes length: sizeof(NSUnicodeStringEncodingLE_data) encoding: NSUnicodeStringEncoding freeWhenDone: YES];
+ bytes = malloc(sizeof(NSUnicodeStringEncoding_data));
+ memcpy(bytes, NSUnicodeStringEncoding_data, sizeof(NSUnicodeStringEncoding_data));
+ string = [[NSString alloc] initWithBytesNoCopy: bytes length: sizeof(NSUnicodeStringEncoding_data) encoding: NSUnicodeStringEncoding freeWhenDone: YES];
fail_if(string == nil, "-[NSString initWithBytesNoCopy: length: encoding: freeWhenDone: YES] returned nil with a UTF-16-LE encoded string.");
[string release];
}
@@ -238,9 +245,5 @@
tcase_add_test(tc_init, test_NSMacOSRomanStringEncoding);
tcase_add_test(tc_init, test_initWithBytesNoCopyLengthEncodingFreeWhenDone);
-#ifdef TODO_NSData
-#error Add a test on the conversion result using dataUsingEncoding:allowLossyConversion: once NSData has been implemented!
-#endif
-
return (s);
}
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|