From: <fg...@us...> - 2011-01-01 20:20:04
|
Revision: 3198 http://openutils.svn.sourceforge.net/openutils/?rev=3198&view=rev Author: fgiust Date: 2011-01-01 20:19:56 +0000 (Sat, 01 Jan 2011) Log Message: ----------- CRIT-30 Adds an utility package with commonly used lucene analyzers Modified Paths: -------------- trunk/openutils-mgnlcriteria/src/test/resources/crit-repository/indexing_configuration.xml trunk/openutils-mgnlcriteria/src/test/resources/crit-repository/jackrabbit-acl-search-index-test-configuration.xml trunk/openutils-mgnlcriteria/src/test/resources/crit-repository/jackrabbit-test-configuration.xml Added Paths: ----------- trunk/openutils-mgnlcriteria/src/main/java/net/sourceforge/openutils/mgnlcriteria/utils/ trunk/openutils-mgnlcriteria/src/main/java/net/sourceforge/openutils/mgnlcriteria/utils/ASCIIFoldingAnalyzer.java trunk/openutils-mgnlcriteria/src/main/java/net/sourceforge/openutils/mgnlcriteria/utils/ASCIIFoldingFilter.java trunk/openutils-mgnlcriteria/src/main/java/net/sourceforge/openutils/mgnlcriteria/utils/Latin1Analyzer.java Removed Paths: ------------- trunk/openutils-mgnlcriteria/src/test/java/net/sourceforge/openutils/mgnlcriteria/tests/ItalianSnowballAnalyzer.java Added: trunk/openutils-mgnlcriteria/src/main/java/net/sourceforge/openutils/mgnlcriteria/utils/ASCIIFoldingAnalyzer.java =================================================================== --- trunk/openutils-mgnlcriteria/src/main/java/net/sourceforge/openutils/mgnlcriteria/utils/ASCIIFoldingAnalyzer.java (rev 0) +++ trunk/openutils-mgnlcriteria/src/main/java/net/sourceforge/openutils/mgnlcriteria/utils/ASCIIFoldingAnalyzer.java 2011-01-01 20:19:56 UTC (rev 3198) @@ -0,0 +1,78 @@ +/** + * + * Magnolia Criteria API (http://www.openmindlab.com/lab/products/mgnlcriteria.html) + * Copyright(C) 2009-2010, Openmind S.r.l. http://www.openmindonline.it + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +package net.sourceforge.openutils.mgnlcriteria.utils; + +import java.io.IOException; +import java.io.Reader; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.LowerCaseFilter; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.standard.StandardFilter; +import org.apache.lucene.analysis.standard.StandardTokenizer; + + +/** + * @author molaschi + * @version $Id$ + */ +public class ASCIIFoldingAnalyzer extends Analyzer +{ + + @Override + public TokenStream tokenStream(String fieldName, Reader reader) + { + StandardTokenizer tokenStream = new StandardTokenizer(reader); + + TokenStream result = new StandardFilter(tokenStream); + result = new LowerCaseFilter(result); + result = new ASCIIFoldingFilter(result); + return result; + } + + @Override + public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException + { + SavedStreams streams = (SavedStreams) getPreviousTokenStream(); + if (streams == null) + { + streams = new SavedStreams(); + setPreviousTokenStream(streams); + streams.tokenStream = new StandardTokenizer(reader); + streams.filteredTokenStream = new StandardFilter(streams.tokenStream); + streams.filteredTokenStream = new LowerCaseFilter(streams.filteredTokenStream); + streams.filteredTokenStream = new ASCIIFoldingFilter(streams.filteredTokenStream); + } + else + { + streams.tokenStream.reset(reader); + } + + return streams.filteredTokenStream; + } + + private static final class SavedStreams + { + + StandardTokenizer tokenStream; + + TokenStream filteredTokenStream; + } + +} Property changes on: trunk/openutils-mgnlcriteria/src/main/java/net/sourceforge/openutils/mgnlcriteria/utils/ASCIIFoldingAnalyzer.java ___________________________________________________________________ Added: svn:mime-type + text/plain Added: svn:keywords + Author Date Id Revision Added: svn:eol-style + native Added: trunk/openutils-mgnlcriteria/src/main/java/net/sourceforge/openutils/mgnlcriteria/utils/ASCIIFoldingFilter.java =================================================================== --- trunk/openutils-mgnlcriteria/src/main/java/net/sourceforge/openutils/mgnlcriteria/utils/ASCIIFoldingFilter.java (rev 0) +++ trunk/openutils-mgnlcriteria/src/main/java/net/sourceforge/openutils/mgnlcriteria/utils/ASCIIFoldingFilter.java 2011-01-01 20:19:56 UTC (rev 3198) @@ -0,0 +1,2017 @@ +/** + * + * Magnolia Criteria API (http://www.openmindlab.com/lab/products/mgnlcriteria.html) + * Copyright(C) 2009-2010, Openmind S.r.l. http://www.openmindonline.it + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +package net.sourceforge.openutils.mgnlcriteria.utils; + +import org.apache.lucene.analysis.Token; +import org.apache.lucene.analysis.TokenFilter; +import org.apache.lucene.analysis.TokenStream; + + +/** + * A filter backported da lucene 2.9.1 which converts any character variation into its ASCII equivalent. + * @author fgiust + * @version $Id$ + */ + +public class ASCIIFoldingFilter extends TokenFilter +{ + + public ASCIIFoldingFilter(TokenStream input) + { + super(input); + } + + private char[] output = new char[256]; + + private int outputPos; + + @Override + public final Token next(final Token reusableToken) throws java.io.IOException + { + assert reusableToken != null; + Token nextToken = input.next(reusableToken); + if (nextToken != null) + { + final char[] buffer = nextToken.termBuffer(); + final int length = nextToken.termLength(); + // If no characters actually require rewriting then we + // just return token as-is: + for (int i = 0; i < length; i++) + { + final char c = buffer[i]; + if (c >= '\u0080') + { + removeAccents(buffer, length); + nextToken.setTermBuffer(output, 0, outputPos); + break; + } + } + return nextToken; + } + else + return null; + } + + /** + * To replace accented characters in a String by unaccented equivalents. + */ + public final void removeAccents(char[] input, int length) + { + + // Worst-case length required: + final int maxSizeNeeded = 2 * length; + + int size = output.length; + while (size < maxSizeNeeded) + size *= 2; + + if (size != output.length) + output = new char[size]; + + outputPos = 0; + + for (int pos = 0; pos < length; ++pos) + { + final char c = input[pos]; + + // Quick test: if it's not in range then just keep current character + if (c < '\u0080') + { + output[outputPos++] = c; + } + else + { + switch (c) + { + case '\u00C0' : // À [LATIN CAPITAL LETTER A WITH GRAVE] + case '\u00C1' : // Á [LATIN CAPITAL LETTER A WITH ACUTE] + case '\u00C2' : // Â [LATIN CAPITAL LETTER A WITH CIRCUMFLEX] + case '\u00C3' : // Ã [LATIN CAPITAL LETTER A WITH TILDE] + case '\u00C4' : // Ä [LATIN CAPITAL LETTER A WITH DIAERESIS] + case '\u00C5' : // Å [LATIN CAPITAL LETTER A WITH RING ABOVE] + case '\u0100' : // Ā [LATIN CAPITAL LETTER A WITH MACRON] + case '\u0102' : // Ă [LATIN CAPITAL LETTER A WITH BREVE] + case '\u0104' : // Ą [LATIN CAPITAL LETTER A WITH OGONEK] + case '\u018F' : // Ə http://en.wikipedia.org/wiki/Schwa [LATIN CAPITAL LETTER SCHWA] + case '\u01CD' : // Ǎ [LATIN CAPITAL LETTER A WITH CARON] + case '\u01DE' : // Ǟ [LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON] + case '\u01E0' : // Ǡ [LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON] + case '\u01FA' : // Ǻ [LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE] + case '\u0200' : // Ȁ [LATIN CAPITAL LETTER A WITH DOUBLE GRAVE] + case '\u0202' : // Ȃ [LATIN CAPITAL LETTER A WITH INVERTED BREVE] + case '\u0226' : // Ȧ [LATIN CAPITAL LETTER A WITH DOT ABOVE] + case '\u023A' : // Ⱥ [LATIN CAPITAL LETTER A WITH STROKE] + case '\u1D00' : // ᴀ [LATIN LETTER SMALL CAPITAL A] + case '\u1E00' : // Ḁ [LATIN CAPITAL LETTER A WITH RING BELOW] + case '\u1EA0' : // Ạ [LATIN CAPITAL LETTER A WITH DOT BELOW] + case '\u1EA2' : // Ả [LATIN CAPITAL LETTER A WITH HOOK ABOVE] + case '\u1EA4' : // Ấ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE] + case '\u1EA6' : // Ầ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE] + case '\u1EA8' : // Ẩ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE] + case '\u1EAA' : // Ẫ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE] + case '\u1EAC' : // Ậ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW] + case '\u1EAE' : // Ắ [LATIN CAPITAL LETTER A WITH BREVE AND ACUTE] + case '\u1EB0' : // Ằ [LATIN CAPITAL LETTER A WITH BREVE AND GRAVE] + case '\u1EB2' : // Ẳ [LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE] + case '\u1EB4' : // Ẵ [LATIN CAPITAL LETTER A WITH BREVE AND TILDE] + case '\u1EB6' : // Ặ [LATIN CAPITAL LETTER A WITH BREVE AND DOT BELOW] + case '\u24B6' : // Ⓐ [CIRCLED LATIN CAPITAL LETTER A] + case '\uFF21' : // A [FULLWIDTH LATIN CAPITAL LETTER A] + output[outputPos++] = 'A'; + break; + case '\u00E0' : // à [LATIN SMALL LETTER A WITH GRAVE] + case '\u00E1' : // á [LATIN SMALL LETTER A WITH ACUTE] + case '\u00E2' : // â [LATIN SMALL LETTER A WITH CIRCUMFLEX] + case '\u00E3' : // ã [LATIN SMALL LETTER A WITH TILDE] + case '\u00E4' : // ä [LATIN SMALL LETTER A WITH DIAERESIS] + case '\u00E5' : // å [LATIN SMALL LETTER A WITH RING ABOVE] + case '\u0101' : // ā [LATIN SMALL LETTER A WITH MACRON] + case '\u0103' : // ă [LATIN SMALL LETTER A WITH BREVE] + case '\u0105' : // ą [LATIN SMALL LETTER A WITH OGONEK] + case '\u01CE' : // ǎ [LATIN SMALL LETTER A WITH CARON] + case '\u01DF' : // ǟ [LATIN SMALL LETTER A WITH DIAERESIS AND MACRON] + case '\u01E1' : // ǡ [LATIN SMALL LETTER A WITH DOT ABOVE AND MACRON] + case '\u01FB' : // ǻ [LATIN SMALL LETTER A WITH RING ABOVE AND ACUTE] + case '\u0201' : // ȁ [LATIN SMALL LETTER A WITH DOUBLE GRAVE] + case '\u0203' : // ȃ [LATIN SMALL LETTER A WITH INVERTED BREVE] + case '\u0227' : // ȧ [LATIN SMALL LETTER A WITH DOT ABOVE] + case '\u0250' : // ɐ [LATIN SMALL LETTER TURNED A] + case '\u0259' : // ə [LATIN SMALL LETTER SCHWA] + case '\u025A' : // ɚ [LATIN SMALL LETTER SCHWA WITH HOOK] + case '\u1D8F' : // ᶏ [LATIN SMALL LETTER A WITH RETROFLEX HOOK] + case '\u1D95' : // ᶕ [LATIN SMALL LETTER SCHWA WITH RETROFLEX HOOK] + case '\u1E01' : // ạ [LATIN SMALL LETTER A WITH RING BELOW] + case '\u1E9A' : // ả [LATIN SMALL LETTER A WITH RIGHT HALF RING] + case '\u1EA1' : // ạ [LATIN SMALL LETTER A WITH DOT BELOW] + case '\u1EA3' : // ả [LATIN SMALL LETTER A WITH HOOK ABOVE] + case '\u1EA5' : // ấ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND ACUTE] + case '\u1EA7' : // ầ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND GRAVE] + case '\u1EA9' : // ẩ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE] + case '\u1EAB' : // ẫ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND TILDE] + case '\u1EAD' : // ậ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND DOT BELOW] + case '\u1EAF' : // ắ [LATIN SMALL LETTER A WITH BREVE AND ACUTE] + case '\u1EB1' : // ằ [LATIN SMALL LETTER A WITH BREVE AND GRAVE] + case '\u1EB3' : // ẳ [LATIN SMALL LETTER A WITH BREVE AND HOOK ABOVE] + case '\u1EB5' : // ẵ [LATIN SMALL LETTER A WITH BREVE AND TILDE] + case '\u1EB7' : // ặ [LATIN SMALL LETTER A WITH BREVE AND DOT BELOW] + case '\u2090' : // ₐ [LATIN SUBSCRIPT SMALL LETTER A] + case '\u2094' : // ₔ [LATIN SUBSCRIPT SMALL LETTER SCHWA] + case '\u24D0' : // ⓐ [CIRCLED LATIN SMALL LETTER A] + case '\u2C65' : // ⱥ [LATIN SMALL LETTER A WITH STROKE] + case '\u2C6F' : // Ɐ [LATIN CAPITAL LETTER TURNED A] + case '\uFF41' : // a [FULLWIDTH LATIN SMALL LETTER A] + output[outputPos++] = 'a'; + break; + case '\uA732' : // Ꜳ [LATIN CAPITAL LETTER AA] + output[outputPos++] = 'A'; + output[outputPos++] = 'A'; + break; + case '\u00C6' : // Æ [LATIN CAPITAL LETTER AE] + case '\u01E2' : // Ǣ [LATIN CAPITAL LETTER AE WITH MACRON] + case '\u01FC' : // Ǽ [LATIN CAPITAL LETTER AE WITH ACUTE] + case '\u1D01' : // ᴁ [LATIN LETTER SMALL CAPITAL AE] + output[outputPos++] = 'A'; + output[outputPos++] = 'E'; + break; + case '\uA734' : // Ꜵ [LATIN CAPITAL LETTER AO] + output[outputPos++] = 'A'; + output[outputPos++] = 'O'; + break; + case '\uA736' : // Ꜷ [LATIN CAPITAL LETTER AU] + output[outputPos++] = 'A'; + output[outputPos++] = 'U'; + break; + case '\uA738' : // Ꜹ [LATIN CAPITAL LETTER AV] + case '\uA73A' : // Ꜻ [LATIN CAPITAL LETTER AV WITH HORIZONTAL BAR] + output[outputPos++] = 'A'; + output[outputPos++] = 'V'; + break; + case '\uA73C' : // Ꜽ [LATIN CAPITAL LETTER AY] + output[outputPos++] = 'A'; + output[outputPos++] = 'Y'; + break; + case '\u249C' : // ⒜ [PARENTHESIZED LATIN SMALL LETTER A] + output[outputPos++] = '('; + output[outputPos++] = 'a'; + output[outputPos++] = ')'; + break; + case '\uA733' : // ꜳ [LATIN SMALL LETTER AA] + output[outputPos++] = 'a'; + output[outputPos++] = 'a'; + break; + case '\u00E6' : // æ [LATIN SMALL LETTER AE] + case '\u01E3' : // ǣ [LATIN SMALL LETTER AE WITH MACRON] + case '\u01FD' : // ǽ [LATIN SMALL LETTER AE WITH ACUTE] + case '\u1D02' : // ᴂ [LATIN SMALL LETTER TURNED AE] + output[outputPos++] = 'a'; + output[outputPos++] = 'e'; + break; + case '\uA735' : // ꜵ [LATIN SMALL LETTER AO] + output[outputPos++] = 'a'; + output[outputPos++] = 'o'; + break; + case '\uA737' : // ꜷ [LATIN SMALL LETTER AU] + output[outputPos++] = 'a'; + output[outputPos++] = 'u'; + break; + case '\uA739' : // ꜹ [LATIN SMALL LETTER AV] + case '\uA73B' : // ꜻ [LATIN SMALL LETTER AV WITH HORIZONTAL BAR] + output[outputPos++] = 'a'; + output[outputPos++] = 'v'; + break; + case '\uA73D' : // ꜽ [LATIN SMALL LETTER AY] + output[outputPos++] = 'a'; + output[outputPos++] = 'y'; + break; + case '\u0181' : // Ɓ [LATIN CAPITAL LETTER B WITH HOOK] + case '\u0182' : // Ƃ [LATIN CAPITAL LETTER B WITH TOPBAR] + case '\u0243' : // Ƀ [LATIN CAPITAL LETTER B WITH STROKE] + case '\u0299' : // ʙ [LATIN LETTER SMALL CAPITAL B] + case '\u1D03' : // ᴃ [LATIN LETTER SMALL CAPITAL BARRED B] + case '\u1E02' : // Ḃ [LATIN CAPITAL LETTER B WITH DOT ABOVE] + case '\u1E04' : // Ḅ [LATIN CAPITAL LETTER B WITH DOT BELOW] + case '\u1E06' : // Ḇ [LATIN CAPITAL LETTER B WITH LINE BELOW] + case '\u24B7' : // Ⓑ [CIRCLED LATIN CAPITAL LETTER B] + case '\uFF22' : // B [FULLWIDTH LATIN CAPITAL LETTER B] + output[outputPos++] = 'B'; + break; + case '\u0180' : // ƀ [LATIN SMALL LETTER B WITH STROKE] + case '\u0183' : // ƃ [LATIN SMALL LETTER B WITH TOPBAR] + case '\u0253' : // ɓ [LATIN SMALL LETTER B WITH HOOK] + case '\u1D6C' : // ᵬ [LATIN SMALL LETTER B WITH MIDDLE TILDE] + case '\u1D80' : // ᶀ [LATIN SMALL LETTER B WITH PALATAL HOOK] + case '\u1E03' : // ḃ [LATIN SMALL LETTER B WITH DOT ABOVE] + case '\u1E05' : // ḅ [LATIN SMALL LETTER B WITH DOT BELOW] + case '\u1E07' : // ḇ [LATIN SMALL LETTER B WITH LINE BELOW] + case '\u24D1' : // ⓑ [CIRCLED LATIN SMALL LETTER B] + case '\uFF42' : // b [FULLWIDTH LATIN SMALL LETTER B] + output[outputPos++] = 'b'; + break; + case '\u249D' : // ⒝ [PARENTHESIZED LATIN SMALL LETTER B] + output[outputPos++] = '('; + output[outputPos++] = 'b'; + output[outputPos++] = ')'; + break; + case '\u00C7' : // Ç [LATIN CAPITAL LETTER C WITH CEDILLA] + case '\u0106' : // Ć [LATIN CAPITAL LETTER C WITH ACUTE] + case '\u0108' : // Ĉ [LATIN CAPITAL LETTER C WITH CIRCUMFLEX] + case '\u010A' : // Ċ [LATIN CAPITAL LETTER C WITH DOT ABOVE] + case '\u010C' : // Č [LATIN CAPITAL LETTER C WITH CARON] + case '\u0187' : // Ƈ [LATIN CAPITAL LETTER C WITH HOOK] + case '\u023B' : // Ȼ [LATIN CAPITAL LETTER C WITH STROKE] + case '\u0297' : // ʗ [LATIN LETTER STRETCHED C] + case '\u1D04' : // ᴄ [LATIN LETTER SMALL CAPITAL C] + case '\u1E08' : // Ḉ [LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE] + case '\u24B8' : // Ⓒ [CIRCLED LATIN CAPITAL LETTER C] + case '\uFF23' : // C [FULLWIDTH LATIN CAPITAL LETTER C] + output[outputPos++] = 'C'; + break; + case '\u00E7' : // ç [LATIN SMALL LETTER C WITH CEDILLA] + case '\u0107' : // ć [LATIN SMALL LETTER C WITH ACUTE] + case '\u0109' : // ĉ [LATIN SMALL LETTER C WITH CIRCUMFLEX] + case '\u010B' : // ċ [LATIN SMALL LETTER C WITH DOT ABOVE] + case '\u010D' : // č [LATIN SMALL LETTER C WITH CARON] + case '\u0188' : // ƈ [LATIN SMALL LETTER C WITH HOOK] + case '\u023C' : // ȼ [LATIN SMALL LETTER C WITH STROKE] + case '\u0255' : // ɕ [LATIN SMALL LETTER C WITH CURL] + case '\u1E09' : // ḉ [LATIN SMALL LETTER C WITH CEDILLA AND ACUTE] + case '\u2184' : // ↄ [LATIN SMALL LETTER REVERSED C] + case '\u24D2' : // ⓒ [CIRCLED LATIN SMALL LETTER C] + case '\uA73E' : // Ꜿ [LATIN CAPITAL LETTER REVERSED C WITH DOT] + case '\uA73F' : // ꜿ [LATIN SMALL LETTER REVERSED C WITH DOT] + case '\uFF43' : // c [FULLWIDTH LATIN SMALL LETTER C] + output[outputPos++] = 'c'; + break; + case '\u249E' : // ⒞ [PARENTHESIZED LATIN SMALL LETTER C] + output[outputPos++] = '('; + output[outputPos++] = 'c'; + output[outputPos++] = ')'; + break; + case '\u00D0' : // Ð [LATIN CAPITAL LETTER ETH] + case '\u010E' : // Ď [LATIN CAPITAL LETTER D WITH CARON] + case '\u0110' : // Đ [LATIN CAPITAL LETTER D WITH STROKE] + case '\u0189' : // Ɖ [LATIN CAPITAL LETTER AFRICAN D] + case '\u018A' : // Ɗ [LATIN CAPITAL LETTER D WITH HOOK] + case '\u018B' : // Ƌ [LATIN CAPITAL LETTER D WITH TOPBAR] + case '\u1D05' : // ᴅ [LATIN LETTER SMALL CAPITAL D] + case '\u1D06' : // ᴆ [LATIN LETTER SMALL CAPITAL ETH] + case '\u1E0A' : // Ḋ [LATIN CAPITAL LETTER D WITH DOT ABOVE] + case '\u1E0C' : // Ḍ [LATIN CAPITAL LETTER D WITH DOT BELOW] + case '\u1E0E' : // Ḏ [LATIN CAPITAL LETTER D WITH LINE BELOW] + case '\u1E10' : // Ḑ [LATIN CAPITAL LETTER D WITH CEDILLA] + case '\u1E12' : // Ḓ [LATIN CAPITAL LETTER D WITH CIRCUMFLEX BELOW] + case '\u24B9' : // Ⓓ [CIRCLED LATIN CAPITAL LETTER D] + case '\uA779' : // Ꝺ [LATIN CAPITAL LETTER INSULAR D] + case '\uFF24' : // D [FULLWIDTH LATIN CAPITAL LETTER D] + output[outputPos++] = 'D'; + break; + case '\u00F0' : // ð [LATIN SMALL LETTER ETH] + case '\u010F' : // ď [LATIN SMALL LETTER D WITH CARON] + case '\u0111' : // đ [LATIN SMALL LETTER D WITH STROKE] + case '\u018C' : // ƌ [LATIN SMALL LETTER D WITH TOPBAR] + case '\u0221' : // ȡ [LATIN SMALL LETTER D WITH CURL] + case '\u0256' : // ɖ [LATIN SMALL LETTER D WITH TAIL] + case '\u0257' : // ɗ [LATIN SMALL LETTER D WITH HOOK] + case '\u1D6D' : // ᵭ [LATIN SMALL LETTER D WITH MIDDLE TILDE] + case '\u1D81' : // ᶁ [LATIN SMALL LETTER D WITH PALATAL HOOK] + case '\u1D91' : // ᶑ [LATIN SMALL LETTER D WITH HOOK AND TAIL] + case '\u1E0B' : // ḋ [LATIN SMALL LETTER D WITH DOT ABOVE] + case '\u1E0D' : // ḍ [LATIN SMALL LETTER D WITH DOT BELOW] + case '\u1E0F' : // ḏ [LATIN SMALL LETTER D WITH LINE BELOW] + case '\u1E11' : // ḑ [LATIN SMALL LETTER D WITH CEDILLA] + case '\u1E13' : // ḓ [LATIN SMALL LETTER D WITH CIRCUMFLEX BELOW] + case '\u24D3' : // ⓓ [CIRCLED LATIN SMALL LETTER D] + case '\uA77A' : // ꝺ [LATIN SMALL LETTER INSULAR D] + case '\uFF44' : // d [FULLWIDTH LATIN SMALL LETTER D] + output[outputPos++] = 'd'; + break; + case '\u01C4' : // DŽ [LATIN CAPITAL LETTER DZ WITH CARON] + case '\u01F1' : // DZ [LATIN CAPITAL LETTER DZ] + output[outputPos++] = 'D'; + output[outputPos++] = 'Z'; + break; + case '\u01C5' : // Dž [LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON] + case '\u01F2' : // Dz [LATIN CAPITAL LETTER D WITH SMALL LETTER Z] + output[outputPos++] = 'D'; + output[outputPos++] = 'z'; + break; + case '\u249F' : // ⒟ [PARENTHESIZED LATIN SMALL LETTER D] + output[outputPos++] = '('; + output[outputPos++] = 'd'; + output[outputPos++] = ')'; + break; + case '\u0238' : // ȸ [LATIN SMALL LETTER DB DIGRAPH] + output[outputPos++] = 'd'; + output[outputPos++] = 'b'; + break; + case '\u01C6' : // dž [LATIN SMALL LETTER DZ WITH CARON] + case '\u01F3' : // dz [LATIN SMALL LETTER DZ] + case '\u02A3' : // ʣ [LATIN SMALL LETTER DZ DIGRAPH] + case '\u02A5' : // ʥ [LATIN SMALL LETTER DZ DIGRAPH WITH CURL] + output[outputPos++] = 'd'; + output[outputPos++] = 'z'; + break; + case '\u00C8' : // È [LATIN CAPITAL LETTER E WITH GRAVE] + case '\u00C9' : // É [LATIN CAPITAL LETTER E WITH ACUTE] + case '\u00CA' : // Ê [LATIN CAPITAL LETTER E WITH CIRCUMFLEX] + case '\u00CB' : // Ë [LATIN CAPITAL LETTER E WITH DIAERESIS] + case '\u0112' : // Ē [LATIN CAPITAL LETTER E WITH MACRON] + case '\u0114' : // Ĕ [LATIN CAPITAL LETTER E WITH BREVE] + case '\u0116' : // Ė [LATIN CAPITAL LETTER E WITH DOT ABOVE] + case '\u0118' : // Ę [LATIN CAPITAL LETTER E WITH OGONEK] + case '\u011A' : // Ě [LATIN CAPITAL LETTER E WITH CARON] + case '\u018E' : // Ǝ [LATIN CAPITAL LETTER REVERSED E] + case '\u0190' : // Ɛ [LATIN CAPITAL LETTER OPEN E] + case '\u0204' : // Ȅ [LATIN CAPITAL LETTER E WITH DOUBLE GRAVE] + case '\u0206' : // Ȇ [LATIN CAPITAL LETTER E WITH INVERTED BREVE] + case '\u0228' : // Ȩ [LATIN CAPITAL LETTER E WITH CEDILLA] + case '\u0246' : // Ɇ [LATIN CAPITAL LETTER E WITH STROKE] + case '\u1D07' : // ᴇ [LATIN LETTER SMALL CAPITAL E] + case '\u1E14' : // Ḕ [LATIN CAPITAL LETTER E WITH MACRON AND GRAVE] + case '\u1E16' : // Ḗ [LATIN CAPITAL LETTER E WITH MACRON AND ACUTE] + case '\u1E18' : // Ḙ [LATIN CAPITAL LETTER E WITH CIRCUMFLEX BELOW] + case '\u1E1A' : // Ḛ [LATIN CAPITAL LETTER E WITH TILDE BELOW] + case '\u1E1C' : // Ḝ [LATIN CAPITAL LETTER E WITH CEDILLA AND BREVE] + case '\u1EB8' : // Ẹ [LATIN CAPITAL LETTER E WITH DOT BELOW] + case '\u1EBA' : // Ẻ [LATIN CAPITAL LETTER E WITH HOOK ABOVE] + case '\u1EBC' : // Ẽ [LATIN CAPITAL LETTER E WITH TILDE] + case '\u1EBE' : // Ế [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE] + case '\u1EC0' : // Ề [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE] + case '\u1EC2' : // Ể [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE] + case '\u1EC4' : // Ễ [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE] + case '\u1EC6' : // Ệ [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW] + case '\u24BA' : // Ⓔ [CIRCLED LATIN CAPITAL LETTER E] + case '\u2C7B' : // ⱻ [LATIN LETTER SMALL CAPITAL TURNED E] + case '\uFF25' : // E [FULLWIDTH LATIN CAPITAL LETTER E] + output[outputPos++] = 'E'; + break; + case '\u00E8' : // è [LATIN SMALL LETTER E WITH GRAVE] + case '\u00E9' : // é [LATIN SMALL LETTER E WITH ACUTE] + case '\u00EA' : // ê [LATIN SMALL LETTER E WITH CIRCUMFLEX] + case '\u00EB' : // ë [LATIN SMALL LETTER E WITH DIAERESIS] + case '\u0113' : // ē [LATIN SMALL LETTER E WITH MACRON] + case '\u0115' : // ĕ [LATIN SMALL LETTER E WITH BREVE] + case '\u0117' : // ė [LATIN SMALL LETTER E WITH DOT ABOVE] + case '\u0119' : // ę [LATIN SMALL LETTER E WITH OGONEK] + case '\u011B' : // ě [LATIN SMALL LETTER E WITH CARON] + case '\u01DD' : // ǝ [LATIN SMALL LETTER TURNED E] + case '\u0205' : // ȅ [LATIN SMALL LETTER E WITH DOUBLE GRAVE] + case '\u0207' : // ȇ [LATIN SMALL LETTER E WITH INVERTED BREVE] + case '\u0229' : // ȩ [LATIN SMALL LETTER E WITH CEDILLA] + case '\u0247' : // ɇ [LATIN SMALL LETTER E WITH STROKE] + case '\u0258' : // ɘ [LATIN SMALL LETTER REVERSED E] + case '\u025B' : // ɛ [LATIN SMALL LETTER OPEN E] + case '\u025C' : // ɜ [LATIN SMALL LETTER REVERSED OPEN E] + case '\u025D' : // ɝ [LATIN SMALL LETTER REVERSED OPEN E WITH HOOK] + case '\u025E' : // ɞ [LATIN SMALL LETTER CLOSED REVERSED OPEN E] + case '\u029A' : // ʚ [LATIN SMALL LETTER CLOSED OPEN E] + case '\u1D08' : // ᴈ [LATIN SMALL LETTER TURNED OPEN E] + case '\u1D92' : // ᶒ [LATIN SMALL LETTER E WITH RETROFLEX HOOK] + case '\u1D93' : // ᶓ [LATIN SMALL LETTER OPEN E WITH RETROFLEX HOOK] + case '\u1D94' : // ᶔ [LATIN SMALL LETTER REVERSED OPEN E WITH RETROFLEX HOOK] + case '\u1E15' : // ḕ [LATIN SMALL LETTER E WITH MACRON AND GRAVE] + case '\u1E17' : // ḗ [LATIN SMALL LETTER E WITH MACRON AND ACUTE] + case '\u1E19' : // ḙ [LATIN SMALL LETTER E WITH CIRCUMFLEX BELOW] + case '\u1E1B' : // ḛ [LATIN SMALL LETTER E WITH TILDE BELOW] + case '\u1E1D' : // ḝ [LATIN SMALL LETTER E WITH CEDILLA AND BREVE] + case '\u1EB9' : // ẹ [LATIN SMALL LETTER E WITH DOT BELOW] + case '\u1EBB' : // ẻ [LATIN SMALL LETTER E WITH HOOK ABOVE] + case '\u1EBD' : // ẽ [LATIN SMALL LETTER E WITH TILDE] + case '\u1EBF' : // ế [LATIN SMALL LETTER E WITH CIRCUMFLEX AND ACUTE] + case '\u1EC1' : // ề [LATIN SMALL LETTER E WITH CIRCUMFLEX AND GRAVE] + case '\u1EC3' : // ể [LATIN SMALL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE] + case '\u1EC5' : // ễ [LATIN SMALL LETTER E WITH CIRCUMFLEX AND TILDE] + case '\u1EC7' : // ệ [LATIN SMALL LETTER E WITH CIRCUMFLEX AND DOT BELOW] + case '\u2091' : // ₑ [LATIN SUBSCRIPT SMALL LETTER E] + case '\u24D4' : // ⓔ [CIRCLED LATIN SMALL LETTER E] + case '\u2C78' : // ⱸ [LATIN SMALL LETTER E WITH NOTCH] + case '\uFF45' : // e [FULLWIDTH LATIN SMALL LETTER E] + output[outputPos++] = 'e'; + break; + case '\u24A0' : // ⒠ [PARENTHESIZED LATIN SMALL LETTER E] + output[outputPos++] = '('; + output[outputPos++] = 'e'; + output[outputPos++] = ')'; + break; + case '\u0191' : // Ƒ [LATIN CAPITAL LETTER F WITH HOOK] + case '\u1E1E' : // Ḟ [LATIN CAPITAL LETTER F WITH DOT ABOVE] + case '\u24BB' : // Ⓕ [CIRCLED LATIN CAPITAL LETTER F] + case '\uA730' : // ꜰ [LATIN LETTER SMALL CAPITAL F] + case '\uA77B' : // Ꝼ [LATIN CAPITAL LETTER INSULAR F] + case '\uA7FB' : // ꟻ [LATIN EPIGRAPHIC LETTER REVERSED F] + case '\uFF26' : // F [FULLWIDTH LATIN CAPITAL LETTER F] + output[outputPos++] = 'F'; + break; + case '\u0192' : // ƒ [LATIN SMALL LETTER F WITH HOOK] + case '\u1D6E' : // ᵮ [LATIN SMALL LETTER F WITH MIDDLE TILDE] + case '\u1D82' : // ᶂ [LATIN SMALL LETTER F WITH PALATAL HOOK] + case '\u1E1F' : // ḟ [LATIN SMALL LETTER F WITH DOT ABOVE] + case '\u1E9B' : // ẛ [LATIN SMALL LETTER LONG S WITH DOT ABOVE] + case '\u24D5' : // ⓕ [CIRCLED LATIN SMALL LETTER F] + case '\uA77C' : // ꝼ [LATIN SMALL LETTER INSULAR F] + case '\uFF46' : // f [FULLWIDTH LATIN SMALL LETTER F] + output[outputPos++] = 'f'; + break; + case '\u24A1' : // ⒡ [PARENTHESIZED LATIN SMALL LETTER F] + output[outputPos++] = '('; + output[outputPos++] = 'f'; + output[outputPos++] = ')'; + break; + case '\uFB00' : // ff [LATIN SMALL LIGATURE FF] + output[outputPos++] = 'f'; + output[outputPos++] = 'f'; + break; + case '\uFB03' : // ffi [LATIN SMALL LIGATURE FFI] + output[outputPos++] = 'f'; + output[outputPos++] = 'f'; + output[outputPos++] = 'i'; + break; + case '\uFB04' : // ffl [LATIN SMALL LIGATURE FFL] + output[outputPos++] = 'f'; + output[outputPos++] = 'f'; + output[outputPos++] = 'l'; + break; + case '\uFB01' : // fi [LATIN SMALL LIGATURE FI] + output[outputPos++] = 'f'; + output[outputPos++] = 'i'; + break; + case '\uFB02' : // fl [LATIN SMALL LIGATURE FL] + output[outputPos++] = 'f'; + output[outputPos++] = 'l'; + break; + case '\u011C' : // Ĝ [LATIN CAPITAL LETTER G WITH CIRCUMFLEX] + case '\u011E' : // Ğ [LATIN CAPITAL LETTER G WITH BREVE] + case '\u0120' : // Ġ [LATIN CAPITAL LETTER G WITH DOT ABOVE] + case '\u0122' : // Ģ [LATIN CAPITAL LETTER G WITH CEDILLA] + case '\u0193' : // Ɠ [LATIN CAPITAL LETTER G WITH HOOK] + case '\u01E4' : // Ǥ [LATIN CAPITAL LETTER G WITH STROKE] + case '\u01E5' : // ǥ [LATIN SMALL LETTER G WITH STROKE] + case '\u01E6' : // Ǧ [LATIN CAPITAL LETTER G WITH CARON] + case '\u01E7' : // ǧ [LATIN SMALL LETTER G WITH CARON] + case '\u01F4' : // Ǵ [LATIN CAPITAL LETTER G WITH ACUTE] + case '\u0262' : // ɢ [LATIN LETTER SMALL CAPITAL G] + case '\u029B' : // ʛ [LATIN LETTER SMALL CAPITAL G WITH HOOK] + case '\u1E20' : // Ḡ [LATIN CAPITAL LETTER G WITH MACRON] + case '\u24BC' : // Ⓖ [CIRCLED LATIN CAPITAL LETTER G] + case '\uA77D' : // Ᵹ [LATIN CAPITAL LETTER INSULAR G] + case '\uA77E' : // Ꝿ [LATIN CAPITAL LETTER TURNED INSULAR G] + case '\uFF27' : // G [FULLWIDTH LATIN CAPITAL LETTER G] + output[outputPos++] = 'G'; + break; + case '\u011D' : // ĝ [LATIN SMALL LETTER G WITH CIRCUMFLEX] + case '\u011F' : // ğ [LATIN SMALL LETTER G WITH BREVE] + case '\u0121' : // ġ [LATIN SMALL LETTER G WITH DOT ABOVE] + case '\u0123' : // ģ [LATIN SMALL LETTER G WITH CEDILLA] + case '\u01F5' : // ǵ [LATIN SMALL LETTER G WITH ACUTE] + case '\u0260' : // ɠ [LATIN SMALL LETTER G WITH HOOK] + case '\u0261' : // ɡ [LATIN SMALL LETTER SCRIPT G] + case '\u1D77' : // ᵷ [LATIN SMALL LETTER TURNED G] + case '\u1D79' : // ᵹ [LATIN SMALL LETTER INSULAR G] + case '\u1D83' : // ᶃ [LATIN SMALL LETTER G WITH PALATAL HOOK] + case '\u1E21' : // ḡ [LATIN SMALL LETTER G WITH MACRON] + case '\u24D6' : // ⓖ [CIRCLED LATIN SMALL LETTER G] + case '\uA77F' : // ꝿ [LATIN SMALL LETTER TURNED INSULAR G] + case '\uFF47' : // g [FULLWIDTH LATIN SMALL LETTER G] + output[outputPos++] = 'g'; + break; + case '\u24A2' : // ⒢ [PARENTHESIZED LATIN SMALL LETTER G] + output[outputPos++] = '('; + output[outputPos++] = 'g'; + output[outputPos++] = ')'; + break; + case '\u0124' : // Ĥ [LATIN CAPITAL LETTER H WITH CIRCUMFLEX] + case '\u0126' : // Ħ [LATIN CAPITAL LETTER H WITH STROKE] + case '\u021E' : // Ȟ [LATIN CAPITAL LETTER H WITH CARON] + case '\u029C' : // ʜ [LATIN LETTER SMALL CAPITAL H] + case '\u1E22' : // Ḣ [LATIN CAPITAL LETTER H WITH DOT ABOVE] + case '\u1E24' : // Ḥ [LATIN CAPITAL LETTER H WITH DOT BELOW] + case '\u1E26' : // Ḧ [LATIN CAPITAL LETTER H WITH DIAERESIS] + case '\u1E28' : // Ḩ [LATIN CAPITAL LETTER H WITH CEDILLA] + case '\u1E2A' : // Ḫ [LATIN CAPITAL LETTER H WITH BREVE BELOW] + case '\u24BD' : // Ⓗ [CIRCLED LATIN CAPITAL LETTER H] + case '\u2C67' : // Ⱨ [LATIN CAPITAL LETTER H WITH DESCENDER] + case '\u2C75' : // Ⱶ [LATIN CAPITAL LETTER HALF H] + case '\uFF28' : // H [FULLWIDTH LATIN CAPITAL LETTER H] + output[outputPos++] = 'H'; + break; + case '\u0125' : // ĥ [LATIN SMALL LETTER H WITH CIRCUMFLEX] + case '\u0127' : // ħ [LATIN SMALL LETTER H WITH STROKE] + case '\u021F' : // ȟ [LATIN SMALL LETTER H WITH CARON] + case '\u0265' : // ɥ [LATIN SMALL LETTER TURNED H] + case '\u0266' : // ɦ [LATIN SMALL LETTER H WITH HOOK] + case '\u02AE' : // ʮ [LATIN SMALL LETTER TURNED H WITH FISHHOOK] + case '\u02AF' : // ʯ [LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL] + case '\u1E23' : // ḣ [LATIN SMALL LETTER H WITH DOT ABOVE] + case '\u1E25' : // ḥ [LATIN SMALL LETTER H WITH DOT BELOW] + case '\u1E27' : // ḧ [LATIN SMALL LETTER H WITH DIAERESIS] + case '\u1E29' : // ḩ [LATIN SMALL LETTER H WITH CEDILLA] + case '\u1E2B' : // ḫ [LATIN SMALL LETTER H WITH BREVE BELOW] + case '\u1E96' : // ẖ [LATIN SMALL LETTER H WITH LINE BELOW] + case '\u24D7' : // ⓗ [CIRCLED LATIN SMALL LETTER H] + case '\u2C68' : // ⱨ [LATIN SMALL LETTER H WITH DESCENDER] + case '\u2C76' : // ⱶ [LATIN SMALL LETTER HALF H] + case '\uFF48' : // h [FULLWIDTH LATIN SMALL LETTER H] + output[outputPos++] = 'h'; + break; + case '\u01F6' : // Ƕ http://en.wikipedia.org/wiki/Hwair [LATIN CAPITAL LETTER HWAIR] + output[outputPos++] = 'H'; + output[outputPos++] = 'V'; + break; + case '\u24A3' : // ⒣ [PARENTHESIZED LATIN SMALL LETTER H] + output[outputPos++] = '('; + output[outputPos++] = 'h'; + output[outputPos++] = ')'; + break; + case '\u0195' : // ƕ [LATIN SMALL LETTER HV] + output[outputPos++] = 'h'; + output[outputPos++] = 'v'; + break; + case '\u00CC' : // Ì [LATIN CAPITAL LETTER I WITH GRAVE] + case '\u00CD' : // Í [LATIN CAPITAL LETTER I WITH ACUTE] + case '\u00CE' : // Î [LATIN CAPITAL LETTER I WITH CIRCUMFLEX] + case '\u00CF' : // Ï [LATIN CAPITAL LETTER I WITH DIAERESIS] + case '\u0128' : // Ĩ [LATIN CAPITAL LETTER I WITH TILDE] + case '\u012A' : // Ī [LATIN CAPITAL LETTER I WITH MACRON] + case '\u012C' : // Ĭ [LATIN CAPITAL LETTER I WITH BREVE] + case '\u012E' : // Į [LATIN CAPITAL LETTER I WITH OGONEK] + case '\u0130' : // İ [LATIN CAPITAL LETTER I WITH DOT ABOVE] + case '\u0196' : // Ɩ [LATIN CAPITAL LETTER IOTA] + case '\u0197' : // Ɨ [LATIN CAPITAL LETTER I WITH STROKE] + case '\u01CF' : // Ǐ [LATIN CAPITAL LETTER I WITH CARON] + case '\u0208' : // Ȉ [LATIN CAPITAL LETTER I WITH DOUBLE GRAVE] + case '\u020A' : // Ȋ [LATIN CAPITAL LETTER I WITH INVERTED BREVE] + case '\u026A' : // ɪ [LATIN LETTER SMALL CAPITAL I] + case '\u1D7B' : // ᵻ [LATIN SMALL CAPITAL LETTER I WITH STROKE] + case '\u1E2C' : // Ḭ [LATIN CAPITAL LETTER I WITH TILDE BELOW] + case '\u1E2E' : // Ḯ [LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE] + case '\u1EC8' : // Ỉ [LATIN CAPITAL LETTER I WITH HOOK ABOVE] + case '\u1ECA' : // Ị [LATIN CAPITAL LETTER I WITH DOT BELOW] + case '\u24BE' : // Ⓘ [CIRCLED LATIN CAPITAL LETTER I] + case '\uA7FE' : // ꟾ [LATIN EPIGRAPHIC LETTER I LONGA] + case '\uFF29' : // I [FULLWIDTH LATIN CAPITAL LETTER I] + output[outputPos++] = 'I'; + break; + case '\u00EC' : // ì [LATIN SMALL LETTER I WITH GRAVE] + case '\u00ED' : // í [LATIN SMALL LETTER I WITH ACUTE] + case '\u00EE' : // î [LATIN SMALL LETTER I WITH CIRCUMFLEX] + case '\u00EF' : // ï [LATIN SMALL LETTER I WITH DIAERESIS] + case '\u0129' : // ĩ [LATIN SMALL LETTER I WITH TILDE] + case '\u012B' : // ī [LATIN SMALL LETTER I WITH MACRON] + case '\u012D' : // ĭ [LATIN SMALL LETTER I WITH BREVE] + case '\u012F' : // į [LATIN SMALL LETTER I WITH OGONEK] + case '\u0131' : // ı [LATIN SMALL LETTER DOTLESS I] + case '\u01D0' : // ǐ [LATIN SMALL LETTER I WITH CARON] + case '\u0209' : // ȉ [LATIN SMALL LETTER I WITH DOUBLE GRAVE] + case '\u020B' : // ȋ [LATIN SMALL LETTER I WITH INVERTED BREVE] + case '\u0268' : // ɨ [LATIN SMALL LETTER I WITH STROKE] + case '\u1D09' : // ᴉ [LATIN SMALL LETTER TURNED I] + case '\u1D62' : // ᵢ [LATIN SUBSCRIPT SMALL LETTER I] + case '\u1D7C' : // ᵼ [LATIN SMALL LETTER IOTA WITH STROKE] + case '\u1D96' : // ᶖ [LATIN SMALL LETTER I WITH RETROFLEX HOOK] + case '\u1E2D' : // ḭ [LATIN SMALL LETTER I WITH TILDE BELOW] + case '\u1E2F' : // ḯ [LATIN SMALL LETTER I WITH DIAERESIS AND ACUTE] + case '\u1EC9' : // ỉ [LATIN SMALL LETTER I WITH HOOK ABOVE] + case '\u1ECB' : // ị [LATIN SMALL LETTER I WITH DOT BELOW] + case '\u2071' : // ⁱ [SUPERSCRIPT LATIN SMALL LETTER I] + case '\u24D8' : // ⓘ [CIRCLED LATIN SMALL LETTER I] + case '\uFF49' : // i [FULLWIDTH LATIN SMALL LETTER I] + output[outputPos++] = 'i'; + break; + case '\u0132' : // IJ [LATIN CAPITAL LIGATURE IJ] + output[outputPos++] = 'I'; + output[outputPos++] = 'J'; + break; + case '\u24A4' : // ⒤ [PARENTHESIZED LATIN SMALL LETTER I] + output[outputPos++] = '('; + output[outputPos++] = 'i'; + output[outputPos++] = ')'; + break; + case '\u0133' : // ij [LATIN SMALL LIGATURE IJ] + output[outputPos++] = 'i'; + output[outputPos++] = 'j'; + break; + case '\u0134' : // Ĵ [LATIN CAPITAL LETTER J WITH CIRCUMFLEX] + case '\u0248' : // Ɉ [LATIN CAPITAL LETTER J WITH STROKE] + case '\u1D0A' : // ᴊ [LATIN LETTER SMALL CAPITAL J] + case '\u24BF' : // Ⓙ [CIRCLED LATIN CAPITAL LETTER J] + case '\uFF2A' : // J [FULLWIDTH LATIN CAPITAL LETTER J] + output[outputPos++] = 'J'; + break; + case '\u0135' : // ĵ [LATIN SMALL LETTER J WITH CIRCUMFLEX] + case '\u01F0' : // ǰ [LATIN SMALL LETTER J WITH CARON] + case '\u0237' : // ȷ [LATIN SMALL LETTER DOTLESS J] + case '\u0249' : // ɉ [LATIN SMALL LETTER J WITH STROKE] + case '\u025F' : // ɟ [LATIN SMALL LETTER DOTLESS J WITH STROKE] + case '\u0284' : // ʄ [LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK] + case '\u029D' : // ʝ [LATIN SMALL LETTER J WITH CROSSED-TAIL] + case '\u24D9' : // ⓙ [CIRCLED LATIN SMALL LETTER J] + case '\u2C7C' : // ⱼ [LATIN SUBSCRIPT SMALL LETTER J] + case '\uFF4A' : // j [FULLWIDTH LATIN SMALL LETTER J] + output[outputPos++] = 'j'; + break; + case '\u24A5' : // ⒥ [PARENTHESIZED LATIN SMALL LETTER J] + output[outputPos++] = '('; + output[outputPos++] = 'j'; + output[outputPos++] = ')'; + break; + case '\u0136' : // Ķ [LATIN CAPITAL LETTER K WITH CEDILLA] + case '\u0198' : // Ƙ [LATIN CAPITAL LETTER K WITH HOOK] + case '\u01E8' : // Ǩ [LATIN CAPITAL LETTER K WITH CARON] + case '\u1D0B' : // ᴋ [LATIN LETTER SMALL CAPITAL K] + case '\u1E30' : // Ḱ [LATIN CAPITAL LETTER K WITH ACUTE] + case '\u1E32' : // Ḳ [LATIN CAPITAL LETTER K WITH DOT BELOW] + case '\u1E34' : // Ḵ [LATIN CAPITAL LETTER K WITH LINE BELOW] + case '\u24C0' : // Ⓚ [CIRCLED LATIN CAPITAL LETTER K] + case '\u2C69' : // Ⱪ [LATIN CAPITAL LETTER K WITH DESCENDER] + case '\uA740' : // Ꝁ [LATIN CAPITAL LETTER K WITH STROKE] + case '\uA742' : // Ꝃ [LATIN CAPITAL LETTER K WITH DIAGONAL STROKE] + case '\uA744' : // Ꝅ [LATIN CAPITAL LETTER K WITH STROKE AND DIAGONAL STROKE] + case '\uFF2B' : // K [FULLWIDTH LATIN CAPITAL LETTER K] + output[outputPos++] = 'K'; + break; + case '\u0137' : // ķ [LATIN SMALL LETTER K WITH CEDILLA] + case '\u0199' : // ƙ [LATIN SMALL LETTER K WITH HOOK] + case '\u01E9' : // ǩ [LATIN SMALL LETTER K WITH CARON] + case '\u029E' : // ʞ [LATIN SMALL LETTER TURNED K] + case '\u1D84' : // ᶄ [LATIN SMALL LETTER K WITH PALATAL HOOK] + case '\u1E31' : // ḱ [LATIN SMALL LETTER K WITH ACUTE] + case '\u1E33' : // ḳ [LATIN SMALL LETTER K WITH DOT BELOW] + case '\u1E35' : // ḵ [LATIN SMALL LETTER K WITH LINE BELOW] + case '\u24DA' : // ⓚ [CIRCLED LATIN SMALL LETTER K] + case '\u2C6A' : // ⱪ [LATIN SMALL LETTER K WITH DESCENDER] + case '\uA741' : // ꝁ [LATIN SMALL LETTER K WITH STROKE] + case '\uA743' : // ꝃ [LATIN SMALL LETTER K WITH DIAGONAL STROKE] + case '\uA745' : // ꝅ [LATIN SMALL LETTER K WITH STROKE AND DIAGONAL STROKE] + case '\uFF4B' : // k [FULLWIDTH LATIN SMALL LETTER K] + output[outputPos++] = 'k'; + break; + case '\u24A6' : // ⒦ [PARENTHESIZED LATIN SMALL LETTER K] + output[outputPos++] = '('; + output[outputPos++] = 'k'; + output[outputPos++] = ')'; + break; + case '\u0139' : // Ĺ [LATIN CAPITAL LETTER L WITH ACUTE] + case '\u013B' : // Ļ [LATIN CAPITAL LETTER L WITH CEDILLA] + case '\u013D' : // Ľ [LATIN CAPITAL LETTER L WITH CARON] + case '\u013F' : // Ŀ [LATIN CAPITAL LETTER L WITH MIDDLE DOT] + case '\u0141' : // Ł [LATIN CAPITAL LETTER L WITH STROKE] + case '\u023D' : // Ƚ [LATIN CAPITAL LETTER L WITH BAR] + case '\u029F' : // ʟ [LATIN LETTER SMALL CAPITAL L] + case '\u1D0C' : // ᴌ [LATIN LETTER SMALL CAPITAL L WITH STROKE] + case '\u1E36' : // Ḷ [LATIN CAPITAL LETTER L WITH DOT BELOW] + case '\u1E38' : // Ḹ [LATIN CAPITAL LETTER L WITH DOT BELOW AND MACRON] + case '\u1E3A' : // Ḻ [LATIN CAPITAL LETTER L WITH LINE BELOW] + case '\u1E3C' : // Ḽ [LATIN CAPITAL LETTER L WITH CIRCUMFLEX BELOW] + case '\u24C1' : // Ⓛ [CIRCLED LATIN CAPITAL LETTER L] + case '\u2C60' : // Ⱡ [LATIN CAPITAL LETTER L WITH DOUBLE BAR] + case '\u2C62' : // Ɫ [LATIN CAPITAL LETTER L WITH MIDDLE TILDE] + case '\uA746' : // Ꝇ [LATIN CAPITAL LETTER BROKEN L] + case '\uA748' : // Ꝉ [LATIN CAPITAL LETTER L WITH HIGH STROKE] + case '\uA780' : // Ꞁ [LATIN CAPITAL LETTER TURNED L] + case '\uFF2C' : // L [FULLWIDTH LATIN CAPITAL LETTER L] + output[outputPos++] = 'L'; + break; + case '\u013A' : // ĺ [LATIN SMALL LETTER L WITH ACUTE] + case '\u013C' : // ļ [LATIN SMALL LETTER L WITH CEDILLA] + case '\u013E' : // ľ [LATIN SMALL LETTER L WITH CARON] + case '\u0140' : // ŀ [LATIN SMALL LETTER L WITH MIDDLE DOT] + case '\u0142' : // ł [LATIN SMALL LETTER L WITH STROKE] + case '\u019A' : // ƚ [LATIN SMALL LETTER L WITH BAR] + case '\u0234' : // ȴ [LATIN SMALL LETTER L WITH CURL] + case '\u026B' : // ɫ [LATIN SMALL LETTER L WITH MIDDLE TILDE] + case '\u026C' : // ɬ [LATIN SMALL LETTER L WITH BELT] + case '\u026D' : // ɭ [LATIN SMALL LETTER L WITH RETROFLEX HOOK] + case '\u1D85' : // ᶅ [LATIN SMALL LETTER L WITH PALATAL HOOK] + case '\u1E37' : // ḷ [LATIN SMALL LETTER L WITH DOT BELOW] + case '\u1E39' : // ḹ [LATIN SMALL LETTER L WITH DOT BELOW AND MACRON] + case '\u1E3B' : // ḻ [LATIN SMALL LETTER L WITH LINE BELOW] + case '\u1E3D' : // ḽ [LATIN SMALL LETTER L WITH CIRCUMFLEX BELOW] + case '\u24DB' : // ⓛ [CIRCLED LATIN SMALL LETTER L] + case '\u2C61' : // ⱡ [LATIN SMALL LETTER L WITH DOUBLE BAR] + case '\uA747' : // ꝇ [LATIN SMALL LETTER BROKEN L] + case '\uA749' : // ꝉ [LATIN SMALL LETTER L WITH HIGH STROKE] + case '\uA781' : // ꞁ [LATIN SMALL LETTER TURNED L] + case '\uFF4C' : // l [FULLWIDTH LATIN SMALL LETTER L] + output[outputPos++] = 'l'; + break; + case '\u01C7' : // LJ [LATIN CAPITAL LETTER LJ] + output[outputPos++] = 'L'; + output[outputPos++] = 'J'; + break; + case '\u1EFA' : // Ỻ [LATIN CAPITAL LETTER MIDDLE-WELSH LL] + output[outputPos++] = 'L'; + output[outputPos++] = 'L'; + break; + case '\u01C8' : // Lj [LATIN CAPITAL LETTER L WITH SMALL LETTER J] + output[outputPos++] = 'L'; + output[outputPos++] = 'j'; + break; + case '\u24A7' : // ⒧ [PARENTHESIZED LATIN SMALL LETTER L] + output[outputPos++] = '('; + output[outputPos++] = 'l'; + output[outputPos++] = ')'; + break; + case '\u01C9' : // lj [LATIN SMALL LETTER LJ] + output[outputPos++] = 'l'; + output[outputPos++] = 'j'; + break; + case '\u1EFB' : // ỻ [LATIN SMALL LETTER MIDDLE-WELSH LL] + output[outputPos++] = 'l'; + output[outputPos++] = 'l'; + break; + case '\u02AA' : // ʪ [LATIN SMALL LETTER LS DIGRAPH] + output[outputPos++] = 'l'; + output[outputPos++] = 's'; + break; + case '\u02AB' : // ʫ [LATIN SMALL LETTER LZ DIGRAPH] + output[outputPos++] = 'l'; + output[outputPos++] = 'z'; + break; + case '\u019C' : // Ɯ [LATIN CAPITAL LETTER TURNED M] + case '\u1D0D' : // ᴍ [LATIN LETTER SMALL CAPITAL M] + case '\u1E3E' : // Ḿ [LATIN CAPITAL LETTER M WITH ACUTE] + case '\u1E40' : // Ṁ [LATIN CAPITAL LETTER M WITH DOT ABOVE] + case '\u1E42' : // Ṃ [LATIN CAPITAL LETTER M WITH DOT BELOW] + case '\u24C2' : // Ⓜ [CIRCLED LATIN CAPITAL LETTER M] + case '\u2C6E' : // Ɱ [LATIN CAPITAL LETTER M WITH HOOK] + case '\uA7FD' : // ꟽ [LATIN EPIGRAPHIC LETTER INVERTED M] + case '\uA7FF' : // ꟿ [LATIN EPIGRAPHIC LETTER ARCHAIC M] + case '\uFF2D' : // M [FULLWIDTH LATIN CAPITAL LETTER M] + output[outputPos++] = 'M'; + break; + case '\u026F' : // ɯ [LATIN SMALL LETTER TURNED M] + case '\u0270' : // ɰ [LATIN SMALL LETTER TURNED M WITH LONG LEG] + case '\u0271' : // ɱ [LATIN SMALL LETTER M WITH HOOK] + case '\u1D6F' : // ᵯ [LATIN SMALL LETTER M WITH MIDDLE TILDE] + case '\u1D86' : // ᶆ [LATIN SMALL LETTER M WITH PALATAL HOOK] + case '\u1E3F' : // ḿ [LATIN SMALL LETTER M WITH ACUTE] + case '\u1E41' : // ṁ [LATIN SMALL LETTER M WITH DOT ABOVE] + case '\u1E43' : // ṃ [LATIN SMALL LETTER M WITH DOT BELOW] + case '\u24DC' : // ⓜ [CIRCLED LATIN SMALL LETTER M] + case '\uFF4D' : // m [FULLWIDTH LATIN SMALL LETTER M] + output[outputPos++] = 'm'; + break; + case '\u24A8' : // ⒨ [PARENTHESIZED LATIN SMALL LETTER M] + output[outputPos++] = '('; + output[outputPos++] = 'm'; + output[outputPos++] = ')'; + break; + case '\u00D1' : // Ñ [LATIN CAPITAL LETTER N WITH TILDE] + case '\u0143' : // Ń [LATIN CAPITAL LETTER N WITH ACUTE] + case '\u0145' : // Ņ [LATIN CAPITAL LETTER N WITH CEDILLA] + case '\u0147' : // Ň [LATIN CAPITAL LETTER N WITH CARON] + case '\u014A' : // Ŋ http://en.wikipedia.org/wiki/Eng_(letter) [LATIN CAPITAL LETTER ENG] + case '\u019D' : // Ɲ [LATIN CAPITAL LETTER N WITH LEFT HOOK] + case '\u01F8' : // Ǹ [LATIN CAPITAL LETTER N WITH GRAVE] + case '\u0220' : // Ƞ [LATIN CAPITAL LETTER N WITH LONG RIGHT LEG] + case '\u0274' : // ɴ [LATIN LETTER SMALL CAPITAL N] + case '\u1D0E' : // ᴎ [LATIN LETTER SMALL CAPITAL REVERSED N] + case '\u1E44' : // Ṅ [LATIN CAPITAL LETTER N WITH DOT ABOVE] + case '\u1E46' : // Ṇ [LATIN CAPITAL LETTER N WITH DOT BELOW] + case '\u1E48' : // Ṉ [LATIN CAPITAL LETTER N WITH LINE BELOW] + case '\u1E4A' : // Ṋ [LATIN CAPITAL LETTER N WITH CIRCUMFLEX BELOW] + case '\u24C3' : // Ⓝ [CIRCLED LATIN CAPITAL LETTER N] + case '\uFF2E' : // N [FULLWIDTH LATIN CAPITAL LETTER N] + output[outputPos++] = 'N'; + break; + case '\u00F1' : // ñ [LATIN SMALL LETTER N WITH TILDE] + case '\u0144' : // ń [LATIN SMALL LETTER N WITH ACUTE] + case '\u0146' : // ņ [LATIN SMALL LETTER N WITH CEDILLA] + case '\u0148' : // ň [LATIN SMALL LETTER N WITH CARON] + case '\u0149' : // ʼn [LATIN SMALL LETTER N PRECEDED BY APOSTROPHE] + case '\u014B' : // ŋ http://en.wikipedia.org/wiki/Eng_(letter) [LATIN SMALL LETTER ENG] + case '\u019E' : // ƞ [LATIN SMALL LETTER N WITH LONG RIGHT LEG] + case '\u01F9' : // ǹ [LATIN SMALL LETTER N WITH GRAVE] + case '\u0235' : // ȵ [LATIN SMALL LETTER N WITH CURL] + case '\u0272' : // ɲ [LATIN SMALL LETTER N WITH LEFT HOOK] + case '\u0273' : // ɳ [LATIN SMALL LETTER N WITH RETROFLEX HOOK] + case '\u1D70' : // ᵰ [LATIN SMALL LETTER N WITH MIDDLE TILDE] + case '\u1D87' : // ᶇ [LATIN SMALL LETTER N WITH PALATAL HOOK] + case '\u1E45' : // ṅ [LATIN SMALL LETTER N WITH DOT ABOVE] + case '\u1E47' : // ṇ [LATIN SMALL LETTER N WITH DOT B... [truncated message content] |