|
From: Rich G. <ric...@ap...> - 2023-07-27 21:02:56
|
Frank— Finally had a chance to read through this. Looks good! I added some comments to the version of this that’s in https://docs.google.com/document/d/1o6FDnf1fVZTETOPlQvwR_XrE_O1F--qIjGyhkCCVnI8/edit. Most of my comments just echo things we talked about this morning, but there were a couple other small comments on top of those. The main one was that I wasn’t quite sure what ulocale_isBogus() did. —Rich > On Jul 20, 2023, at 10:23 PM, Frank Tang (譚永鋒) <ft...@go...> wrote: > > Sorry, I missed something. Here is the revised one > > diff --git a/icu4c/source/common/unicode/uloc.h b/icu4c/source/common/unicode/uloc.h > index 21179c1b628..0d2a09c8885 100644 > --- a/icu4c/source/common/unicode/uloc.h > +++ b/icu4c/source/common/unicode/uloc.h > @@ -1390,4 +1390,299 @@ uloc_toLegacyKey(const char* keyword); > U_CAPI const char* U_EXPORT2 > uloc_toLegacyType(const char* keyword, const char* value); > > +#ifndef U_HIDE_DRAFT_API > +/** > + * Opaque C service object type for the locale API > + * @draft ICU 74 > + */ > +struct ULocale; > + > +/** > + * C typedef for struct ULocale. > + * @draft ICU 74 > + */ > +typedef struct ULocale ULocale; > + > +/** > + * Constructs an ULocale from the locale ID. > + * The created ULocale should be destoried by calling > + * ulocale_close(); > + * @param locale the locale, a const char * pointer (need not be terminated when > + * the length is non-negative) > + * @param length the length of the locale; if negative, then the locale need to be > + * null terminated. > + * @return the locale. > + * > + * @draft ICU 74 > + */ > +U_CAPI ULocale* U_EXPORT2 > +ulocale_openLocaleID(const char* localeID, int32_t length); > + > +/** > + * Constructs an ULocale from the provided IETF BCP 47 language tag. > + * The created ULocale should be destoried by calling > + * ulocale_close(); > + * @param tag the language tag, defined as IETF BCP 47 language tag, const > + * char* pointer (need not be terminated when the length is non-negative) > + * @param length the length of the tag; if negative, then the tag need to be > + * null terminated. > + * @param err the error code > + * @return the locale. > + * > + * @draft ICU 74 > + */ > +U_CAPI ULocale* U_EXPORT2 > +ulocale_openForLanguageTag(const char* tag, int32_t length, UErrorCode* err); > + > +/** > + * Close the locale and destroy it's internal states. > + * > + * @param locale the locale > + * @draft ICU 74 > + */ > +U_CAPI void U_EXPORT2 > +ulocale_close(ULocale* locale); > + > +/** > + * Constructs another ULocale by canonicalize the provided locale. > + * The created ULocale should be destoried by calling > + * ulocale_close(); > + * > + * @param locale the locale > + * @param err the error code > + * @return the canonicalized locale. > + * @draft ICU 74 > + */ > +U_CAPI ULocale* U_EXPORT2 > +ulocale_canonicalize(const ULocale* locale, UErrorCode *err); > + > +/** > + * Constructs another ULocale by maximize from the the subtags of the provided > + * Locale, per the algorithm described in the following CLDR technical report: > + * http://www.unicode.org/reports/tr35/#Likely_Subtags > + * The created ULocale should be destoried by calling > + * ulocale_close(); > + * > + * @param locale the locale > + * @param err the error code > + * @return the maximized locale. > + * @draft ICU 74 > + */ > +U_CAPI ULocale* U_EXPORT2 > +ulocale_addLikelySubtags(const ULocale* locale, UErrorCode *err); > + > +/** > + * Constructs another ULocale by minimize from the the subtags of the provided > + * Locale, per the algorithm described in the following CLDR technical report: > + * http://www.unicode.org/reports/tr35/#Likely_Subtags > + * The created ULocale should be destoried by calling > + * ulocale_close(); > + * > + * @param locale the locale > + * @param err the error code > + * @return the minimized locale. > + * @draft ICU 74 > + */ > +U_CAPI ULocale* U_EXPORT2 > +ulocale_minimizeSubtags(const ULocale* locale, UErrorCode *err); > + > +/** > + * Returns the locale's ISO-639 language code. > + * > + * @param locale the locale > + * @return the language code of the locale. > + * @draft ICU 74 > + */ > +U_CAPI const char* U_EXPORT2 > +ulocale_getLanguage(const ULocale* locale); > + > +/** > + * Returns the locale's ISO-15924 abbreviation script code. > + * > + * @param locale the locale > + * @return A pointer to the script. > + * @draft ICU 74 > + */ > +U_CAPI const char* U_EXPORT2 > +ulocale_getScript(const ULocale* locale); > + > +/** > + * Returns the locale's ISO-3166 region code. > + * > + * @param locale the locale > + * @return A pointer to the region. > + * @draft ICU 74 > + */ > +U_CAPI const char* U_EXPORT2 > +ulocale_getRegion(const ULocale* locale); > + > +/** > + * Returns the locale's variant code. > + * > + * @param locale the locale > + * @return A pointer to the variant. > + * @draft ICU 74 > + */ > +U_CAPI const char* U_EXPORT2 > +ulocale_getVariant(const ULocale* locale); > + > +/** > + * Returns the programmatic name of the entire locale, with the language, > + * country and variant separated by underbars. If a field is missing, up > + * to two leading underbars will occur. Example: "en", "de_DE", "en_US_WIN", > + * "de__POSIX", "fr__MAC", "__MAC", "_MT", "_FR_EURO" > + * > + * @param locale the locale > + * @return A pointer to "name". > + * @draft ICU 74 > + */ > +U_CAPI const char* U_EXPORT2 > +ulocale_getLocaleID(const ULocale* locale); > + > +/** > + * Returns the programmatic name of the entire locale as ulocale_getLocaleID() > + * would return, but without keywords. > + * > + * @param locale the locale > + * @return A pointer to "base name". > + * @draft ICU 74 > + */ > +U_CAPI const char* U_EXPORT2 > +ulocale_getBaseName(const ULocale* locale); > + > +/** > + * Returns the locale's three-letter language code, as specified > + * in ISO draft standard ISO-639-2. > + * > + * @param locale the locale > + * @return the locale's three-letter language code of the locale, > + * or an empty string.. > + * @draft ICU 74 > + */ > +U_CAPI const char* U_EXPORT2 > +ulocale_getISO3Language(const ULocale* locale); > + > +/** > + * Returns the locale's three-letter ISO-3166 region code. > + * > + * @param locale the locale > + * @return the locale's three-letter ISO-3166 region code of the locale, > + * or an empty string.. > + * @draft ICU 74 > + */ > +U_CAPI const char* U_EXPORT2 > +ulocale_getISO3Region(const ULocale* locale); > + > +/** > + * Returns whether this locale's script is written right-to-left. > + * If there is no script subtag, then the likely script is used, see uloc_addLikelySubtags(). > + * If no likely script is known, then false is returned. > + * > + * A script is right-to-left according to the CLDR script metadata > + * which corresponds to whether the script's letters have Bidi_Class=R or AL. > + * > + * Returns true for "ar" and "en-Hebr", false for "zh" and "fa-Cyrl". > + * > + * @param locale the locale > + * @return true if the locale's script is written right-to-left > + * @draft ICU 74 > + */ > +U_CAPI bool U_EXPORT2 > +ulocale_isRightToLeft(const ULocale* locale); > + > +/** > + * Gets the bogus state. Locale object can be bogus if it doesn't exist > + * > + * @param locale the locale > + * @return false if it is a real locale, true if it is a bogus locale > + * @draft ICU 74 > + */ > +U_CAPI bool U_EXPORT2 > +ulocale_isBogus(const ULocale* locale); > + > +/** > + * Gets the list of keywords for the specified locale. > + * > + * @param locale the locale > + * @param err the error code > + * @return pointer to UEnumeration, or nullptr if there are no keywords. > + * Client must call uenum_close() to dispose the returned value. > + * @draft ICU 74 > + */ > +U_CAPI UEnumeration* U_EXPORT2 > +ulocale_getKeywords(const ULocale* locale, UErrorCode *err); > + > +/** > + * Gets the list of unicode keywords for the specified locale. > + * > + * @param locale the locale > + * @param err the error code > + * @return pointer to UEnumeration, or nullptr if there are no keywords. > + * Client must call uenum_close() to dispose the returned value. > + * @draft ICU 74 > + */ > +U_CAPI UEnumeration* U_EXPORT2 > +ulocale_getUnicodeKeywords(const ULocale* locale, UErrorCode *err); > + > +/** > + * Gets the value for a keyword. > + * > + * This uses legacy keyword=value pairs, like "collation=phonebook". > + * > + * @param locale the locale > + * @param keyword the keyword, a const char * pointer (need not be > + * terminated when the length is non-negative) > + * @param keywordLength the length of the keyword; if negative, then the > + * keyword need to be null terminated. > + * @param valueBuffer The buffer to receive the value. > + * @param valueBufferCapacity The capacity of receiving valueBuffer. > + * @param err the error code > + * @draft ICU 74 > + */ > +U_CAPI int32_t U_EXPORT2 > +ulocale_getKeywordValue( > + const ULocale* locale, const char* keyword, int32_t keywordLength, > + char* valueBuffer, int32_t valueBufferCapacity, UErrorCode *err); > + > +/** > + * Gets the Unicode value for a Unicode keyword. > + * > + * This uses Unicode key-value pairs, like "co-phonebk". > + * > + * @param locale the locale > + * @param keyword the Unicode keyword, a const char * pointer (need not be > + * terminated when the length is non-negative) > + * @param keywordLength the length of the Unicode keyword; if negative, > + * then the keyword need to be null terminated. > + * @param valueBuffer The buffer to receive the Unicode value. > + * @param valueBufferCapacity The capacity of receiving valueBuffer. > + * @param err the error code > + * @draft ICU 74 > + */ > +U_CAPI int32_t U_EXPORT2 > +ulocale_getUnicodeKeywordValue( > + const ULocale* locale, const char* keyword, int32_t keywordLength, > + char* valueBuffer, int32_t bufferCapacity, UErrorCode *err); > + > +#if U_SHOW_CPLUSPLUS_API > + > +U_NAMESPACE_BEGIN > + > +/** > + * \class LocalULocalePointer > + * "Smart pointer" class, closes a ULocale via ulocale_close(). > + * For most methods see the LocalPointerBase base class. > + * > + * @see LocalPointerBase > + * @see LocalPointer > + * @draft ICU 74 > + */ > +U_DEFINE_LOCAL_OPEN_POINTER(LocalULocalePointer, ULocale, ulocale_close); > + > +U_NAMESPACE_END > + > +#endif /* U_SHOW_CPLUSPLUS_API */ > + > +#endif /* U_HIDE_DRAFT_API */ > + > #endif /*_ULOC*/ > > On Thu, 20 Jul 2023 at 18:22, Frank Tang (譚永鋒) <ft...@go... <mailto:ft...@go...>> wrote: >> Dear ICU team & users, >> >> >> I would like to propose the following for: ICU 74 >> >> Please provide feedback by: Next Wednesday, July 26, or any time sufficiently in advance of the feature freeze >> >> Designated API review: Rich Gillam >> >> Issue: https://unicode-org.atlassian.net/browse/ICU-22435 >> A draft PR and implementation (not including test yet) could be found at >> https://github.com/unicode-org/icu/pull/2531 ) >> >> This is to follow up the C API for ULocaleBuilder >> >> Issue to be discussed >> 1. Do we need methods for canonicalize/addLikelySubtag/minimizeSubtags >> 2. If the answer for #1 is YES, should we make them as producer which to produce and return a different ULocale and keep ULocale as an immutable object. (as proposed below) >> 3. Do we need methods unrelated of parsing: >> ulocale_isRightToLeft >> ulocale_getISO3Country >> ulocale_getISO3Language >> >> >> >> diff --git a/icu4c/source/common/unicode/uloc.h b/icu4c/source/common/unicode/uloc.h >> index 21179c1b628..4b0b419ec4b 100644 >> --- a/icu4c/source/common/unicode/uloc.h >> +++ b/icu4c/source/common/unicode/uloc.h >> @@ -1390,4 +1390,279 @@ uloc_toLegacyKey(const char* keyword); >> U_CAPI const char* U_EXPORT2 >> uloc_toLegacyType(const char* keyword, const char* value); >> >> +#ifndef U_HIDE_DRAFT_API >> +/** >> + * Opaque C service object type for the locale API >> + * @draft ICU 74 >> + */ >> +struct ULocale; >> + >> +/** >> + * C typedef for struct ULocale. >> + * @draft ICU 74 >> + */ >> +typedef struct ULocale ULocale; >> + >> +/** >> + * Constructs an ULocale from the locale ID. >> + * The created ULocale should be destoried by calling >> + * ulocale_close(); >> + * @param locale the locale, a const char * pointer (need not be terminated when >> + * the length is non-negative) >> + * @param length the length of the locale; if negative, then the locale need to be >> + * null terminated. >> + * @return the locale. >> + * >> + * @draft ICU 74 >> + */ >> +U_CAPI ULocale* U_EXPORT2 >> +ulocale_openLocaleID(const char* localeID, int32_t length); >> + >> +/** >> + * Constructs an ULocale from the provided IETF BCP 47 language tag. >> + * The created ULocale should be destoried by calling >> + * ulocale_close(); >> + * @param tag the language tag, defined as IETF BCP 47 language tag, const >> + * char* pointer (need not be terminated when the length is non-negative) >> + * @param length the length of the tag; if negative, then the tag need to be >> + * null terminated. >> + * @return the locale. >> + * >> + * @draft ICU 74 >> + */ >> +U_CAPI ULocale* U_EXPORT2 >> +ulocale_openForLanguageTag(const char* tag, int32_t length); >> + >> +/** >> + * Close the locale and destroy it's internal states. >> + * >> + * @param locale the locale >> + * @draft ICU 74 >> + */ >> +U_CAPI void U_EXPORT2 >> +ulocale_close(ULocale* locale); >> + >> +/** >> + * Constructs another ULocale by canonicalize the provided locale. >> + * The created ULocale should be destoried by calling >> + * ulocale_close(); >> + * >> + * @param locale the locale >> + * @param err the error code >> + * @return the canonicalized locale. >> + * @draft ICU 74 >> + */ >> +U_CAPI ULocale* U_EXPORT2 >> +ulocale_canonicalize(const ULocale* locale, UErrorCode *err); >> + >> +/** >> + * Constructs another ULocale by maximize from the the subtags of the provided >> + * Locale, per the algorithm described in the following CLDR technical report: >> + * http://www.unicode.org/reports/tr35/#Likely_Subtags >> + * The created ULocale should be destoried by calling >> + * ulocale_close(); >> + * >> + * @param locale the locale >> + * @param err the error code >> + * @return the maximized locale. >> + * @draft ICU 74 >> + */ >> +U_CAPI ULocale* U_EXPORT2 >> +ulocale_addLikelySubtags(const ULocale* locale, UErrorCode *err); >> + >> +/** >> + * Constructs another ULocale by minimize from the the subtags of the provided >> + * Locale, per the algorithm described in the following CLDR technical report: >> + * http://www.unicode.org/reports/tr35/#Likely_Subtags >> + * The created ULocale should be destoried by calling >> + * ulocale_close(); >> + * >> + * @param locale the locale >> + * @param err the error code >> + * @return the minimized locale. >> + * @draft ICU 74 >> + */ >> +U_CAPI ULocale* U_EXPORT2 >> +ulocale_minimizeSubtags(const ULocale* locale, UErrorCode *err); >> + >> +/** >> + * Returns the locale's ISO-639 language code. >> + * >> + * @param locale the locale >> + * @return the language code of the locale. >> + * @draft ICU 74 >> + */ >> +U_CAPI const char* U_EXPORT2 >> +ulocale_getLanguage(const ULocale* locale); >> + >> +/** >> + * Returns the locale's ISO-15924 abbreviation script code. >> + * >> + * @param locale the locale >> + * @return A pointer to the script. >> + * @draft ICU 74 >> + */ >> +U_CAPI const char* U_EXPORT2 >> +ulocale_getScript(const ULocale* locale); >> + >> +/** >> + * Returns the locale's ISO-3166 region code. >> + * >> + * @param locale the locale >> + * @return A pointer to the region. >> + * @draft ICU 74 >> + */ >> +U_CAPI const char* U_EXPORT2 >> +ulocale_getRegion(const ULocale* locale); >> + >> +/** >> + * Returns the locale's variant code. >> + * >> + * @param locale the locale >> + * @return A pointer to the variant. >> + * @draft ICU 74 >> + */ >> +U_CAPI const char* U_EXPORT2 >> +ulocale_getVariant(const ULocale* locale); >> + >> +/** >> + * Returns the programmatic name of the entire locale, with the language, >> + * country and variant separated by underbars. If a field is missing, up >> + * to two leading underbars will occur. Example: "en", "de_DE", "en_US_WIN", >> + * "de__POSIX", "fr__MAC", "__MAC", "_MT", "_FR_EURO" >> + * >> + * @param locale the locale >> + * @return A pointer to "name". >> + * @draft ICU 74 >> + */ >> +U_CAPI const char* U_EXPORT2 >> +ulocale_getLocaleID(const ULocale* locale); >> + >> +/** >> + * Returns the programmatic name of the entire locale as ulocale_getLocaleID() >> + * would return, but without keywords. >> + * >> + * @param locale the locale >> + * @return A pointer to "base name". >> + * @draft ICU 74 >> + */ >> +U_CAPI const char* U_EXPORT2 >> +ulocale_getBaseName(const ULocale* locale); >> + >> +/** >> + * Returns the locale's three-letter language code, as specified >> + * in ISO draft standard ISO-639-2. >> + * >> + * @param locale the locale >> + * @return the locale's three-letter language code of the locale, >> + * or an empty string.. >> + * @draft ICU 74 >> + */ >> +U_CAPI const char* U_EXPORT2 >> +ulocale_getISO3Language(const ULocale* locale); >> + >> +/** >> + * Returns the locale's three-letter ISO-3166 country code. >> + * >> + * @param locale the locale >> + * @return the locale's three-letter ISO-3166 country code of the locale, >> + * or an empty string.. >> + * @draft ICU 74 >> + */ >> +U_CAPI const char* U_EXPORT2 >> +ulocale_getISO3Country(const ULocale* locale); >> + >> +/** >> + * Returns whether this locale's script is written right-to-left. >> + * If there is no script subtag, then the likely script is used, see uloc_addLikelySubtags(). >> + * If no likely script is known, then false is returned. >> + * >> + * A script is right-to-left according to the CLDR script metadata >> + * which corresponds to whether the script's letters have Bidi_Class=R or AL. >> + * >> + * Returns true for "ar" and "en-Hebr", false for "zh" and "fa-Cyrl". >> + * >> + * @param locale the locale >> + * @return true if the locale's script is written right-to-left >> + * @draft ICU 74 >> + */ >> +U_CAPI bool U_EXPORT2 >> +ulocale_isRightToLeft(const ULocale* locale); >> + >> +/** >> + * Gets the bogus state. Locale object can be bogus if it doesn't exist >> + * >> + * @param locale the locale >> + * @return false if it is a real locale, true if it is a bogus locale >> + * @draft ICU 74 >> + */ >> +U_CAPI bool U_EXPORT2 >> +ulocale_isBogus(const ULocale* locale); >> + >> +/** >> + * Gets the list of keywords for the specified locale. >> + * >> + * @param locale the locale >> + * @param err the error code >> + * @return pointer to UEnumeration, or nullptr if there are no keywords. >> + * Client must call uenum_close() to dispose the returned value. >> + * @draft ICU 74 >> + */ >> +U_CAPI UEnumeration* U_EXPORT2 >> +ulocale_getKeywords(const ULocale* locale, UErrorCode *err); >> + >> +/** >> + * Gets the list of unicode keywords for the specified locale. >> + * >> + * @param locale the locale >> + * @param err the error code >> + * @return pointer to UEnumeration, or nullptr if there are no keywords. >> + * Client must call uenum_close() to dispose the returned value. >> + * @draft ICU 74 >> + */ >> +U_CAPI UEnumeration* U_EXPORT2 >> +ulocale_getUnicodeKeywords(const ULocale* locale, UErrorCode *err); >> + >> +/** >> + * Gets the value for a keyword. >> + * >> + * This uses legacy keyword=value pairs, like "collation=phonebook". >> + * >> + * @param locale the locale >> + * @param keyword the keyword, a const char * pointer (need not be >> + * terminated when the length is non-negative) >> + * @param keywordLength the length of the keyword; if negative, then the >> + * keyword need to be null terminated. >> + * @param valueBuffer The buffer to receive the value. >> + * @param valueBufferCapacity The capacity of receiving valueBuffer. >> + * @param err the error code >> + * @draft ICU 74 >> + */ >> +U_CAPI int32_t U_EXPORT2 >> +ulocale_getKeywordValue( >> + const ULocale* locale, const char* keyword, int32_t keywordLength, >> + char* valueBuffer, int32_t valueBufferCapacity, UErrorCode *err); >> + >> +/** >> + * Gets the Unicode value for a Unicode keyword. >> + * >> + * This uses Unicode key-value pairs, like "co-phonebk". >> + * >> + * @param locale the locale >> + * @param keyword the Unicode keyword, a const char * pointer (need not be >> + * terminated when the length is non-negative) >> + * @param keywordLength the length of the Unicode keyword; if negative, >> + * then the keyword need to be null terminated. >> + * @param valueBuffer The buffer to receive the Unicode value. >> + * @param valueBufferCapacity The capacity of receiving valueBuffer. >> + * @param err the error code >> + * @draft ICU 74 >> + */ >> +U_CAPI int32_t U_EXPORT2 >> +ulocale_getUnicodeKeywordValue( >> + const ULocale* locale, const char* keyword, int32_t keywordLength, >> + char* valueBuffer, int32_t bufferCapacity, UErrorCode *err); >> + >> +#endif /* U_HIDE_DRAFT_API */ >> + >> #endif /*_ULOC*/ >> -- >> Frank Yung-Fong Tang >> 譚永鋒 / 🌭🍊 >> Sr. Software Engineer > > > -- > Frank Yung-Fong Tang > 譚永鋒 / 🌭🍊 > Sr. Software Engineer |