From: <mi...@us...> - 2012-11-23 01:18:57
|
Revision: 7538 http://docutils.svn.sourceforge.net/docutils/?rev=7538&view=rev Author: milde Date: 2012-11-23 01:18:49 +0000 (Fri, 23 Nov 2012) Log Message: ----------- normalize_language_tag() now returns `BCP 47`_ conformant tags Subtags separated by ``-``, not ``_``. Modified Paths: -------------- trunk/docutils/HISTORY.txt trunk/docutils/docutils/languages/__init__.py trunk/docutils/docutils/parsers/rst/languages/__init__.py trunk/docutils/docutils/utils/__init__.py trunk/docutils/docutils/writers/latex2e/__init__.py trunk/docutils/docutils/writers/xetex/__init__.py trunk/docutils/test/test_utils.py Modified: trunk/docutils/HISTORY.txt =================================================================== --- trunk/docutils/HISTORY.txt 2012-11-18 22:11:49 UTC (rev 7537) +++ trunk/docutils/HISTORY.txt 2012-11-23 01:18:49 UTC (rev 7538) @@ -43,6 +43,11 @@ - Add SmartQuotes transform for typographic quotes and dashes. +* docutils/utils/__init__.py + + - normalize_language_tag() now returns `BCP 47`_ conformant tags + with subtags separated by ``-``. + * docutils/writers/html4css1/__init__.py - Use ``<code>`` tag for inline "code", Modified: trunk/docutils/docutils/languages/__init__.py =================================================================== --- trunk/docutils/docutils/languages/__init__.py 2012-11-18 22:11:49 UTC (rev 7537) +++ trunk/docutils/docutils/languages/__init__.py 2012-11-23 01:18:49 UTC (rev 7538) @@ -27,6 +27,7 @@ """ # TODO: use a dummy module returning emtpy strings?, configurable? for tag in normalize_language_tag(language_code): + tag = tag.replace('-','_') # '-' not valid in module names if tag in _languages: return _languages[tag] try: Modified: trunk/docutils/docutils/parsers/rst/languages/__init__.py =================================================================== --- trunk/docutils/docutils/parsers/rst/languages/__init__.py 2012-11-18 22:11:49 UTC (rev 7537) +++ trunk/docutils/docutils/parsers/rst/languages/__init__.py 2012-11-23 01:18:49 UTC (rev 7538) @@ -22,6 +22,7 @@ def get_language(language_code): for tag in normalize_language_tag(language_code): + tag = tag.replace('-','_') # '-' not valid in module names if tag in _languages: return _languages[tag] try: Modified: trunk/docutils/docutils/utils/__init__.py =================================================================== --- trunk/docutils/docutils/utils/__init__.py 2012-11-18 22:11:49 UTC (rev 7537) +++ trunk/docutils/docutils/utils/__init__.py 2012-11-23 01:18:49 UTC (rev 7538) @@ -12,6 +12,7 @@ import sys import os import os.path +import re import warnings import unicodedata from docutils import ApplicationError, DataError @@ -642,20 +643,20 @@ Example: - >>> normalize_language_tag('de-AT-1901') - ['de_at_1901', 'de_at', 'de_1901', 'de'] + >>> normalize_language_tag('de_AT-1901') + ['de-at-1901', 'de-at', 'de-1901', 'de'] """ # normalize: - tag = tag.lower().replace('-','_') + tag = tag.lower().replace('_','-') + # split (except singletons, which mark the following tag as non-standard): + tag = re.sub(r'-([a-zA-Z0-9])-', r'-\1_', tag) + taglist = [] + subtags = [subtag.replace('_', '-') for subtag in tag.split('-')] + base_tag = [subtags.pop(0)] # find all combinations of subtags - taglist = [] - base_tag= tag.split('_')[:1] - subtags = tag.split('_')[1:] - # print base_tag, subtags for n in range(len(subtags), 0, -1): for tags in unique_combinations(subtags, n): - # print tags - taglist.append('_'.join(base_tag + tags)) + taglist.append('-'.join(base_tag+tags)) taglist += base_tag return taglist Modified: trunk/docutils/docutils/writers/latex2e/__init__.py =================================================================== --- trunk/docutils/docutils/writers/latex2e/__init__.py 2012-11-18 22:11:49 UTC (rev 7537) +++ trunk/docutils/docutils/writers/latex2e/__init__.py 2012-11-23 01:18:49 UTC (rev 7538) @@ -293,18 +293,18 @@ 'cy': 'welsh', 'da': 'danish', 'de': 'ngerman', # new spelling (de_1996) - 'de_1901': 'german', # old spelling - 'de_at': 'naustrian', - 'de_at_1901': 'austrian', + 'de-1901': 'german', # old spelling + 'de-AT': 'naustrian', + 'de-AT-1901': 'austrian', 'dsb': 'lowersorbian', 'el': 'greek', # monotonic (el-monoton) - 'el_polyton': 'polutonikogreek', + 'el-polyton': 'polutonikogreek', 'en': 'english', # TeX' default language - 'en_au': 'australian', - 'en_ca': 'canadian', - 'en_gb': 'british', - 'en_nz': 'newzealand', - 'en_us': 'american', + 'en-AU': 'australian', + 'en-CA': 'canadian', + 'en-GB': 'british', + 'en-NZ': 'newzealand', + 'en-US': 'american', 'eo': 'esperanto', # '^' is active 'es': 'spanish', 'et': 'estonian', @@ -312,10 +312,10 @@ # 'fa': 'farsi', 'fi': 'finnish', 'fr': 'french', - 'fr_ca': 'canadien', + 'fr-CA': 'canadien', 'ga': 'irish', # Irish Gaelic # 'grc': # Ancient Greek - 'grc_ibycus': 'ibycus', # Ibycus encoding + 'grc-ibycus': 'ibycus', # Ibycus encoding 'gl': 'galician', 'he': 'hebrew', 'hr': 'croatian', @@ -338,24 +338,27 @@ 'no': 'norsk', # Norwegian Bokmal 'pl': 'polish', 'pt': 'portuges', - 'pt_br': 'brazil', + 'pt-BR': 'brazil', 'ro': 'romanian', 'ru': 'russian', # '"' is active 'se': 'samin', # North Sami - # sh-cyrl: Serbo-Croatian, Cyrillic script - 'sh-latn': 'serbian', # Serbo-Croatian, Latin script + # sh-Cyrl: Serbo-Croatian, Cyrillic script + 'sh-Latn': 'serbian', # Serbo-Croatian, Latin script 'sk': 'slovak', 'sl': 'slovene', 'sq': 'albanian', - # 'sr-cyrl': Serbian, Cyrillic script (sr-cyrl) - 'sr-latn': 'serbian', # Serbian, Latin script, " active. + # 'sr-Cyrl': Serbian, Cyrillic script (sr-cyrl) + 'sr-Latn': 'serbian', # Serbian, Latin script, " active. 'sv': 'swedish', # 'th': 'thai', 'tr': 'turkish', 'uk': 'ukrainian', 'vi': 'vietnam', - # zh-latn: Chinese Pinyin + # zh-Latn: Chinese Pinyin } + # normalize (downcase) keys + language_codes = dict([(k.lower(), v) for (k,v) in language_codes.items()]) + warn_msg = 'Language "%s" not supported by LaTeX (babel)' def __init__(self, language_code, reporter=None): @@ -1595,8 +1598,12 @@ self.out.append( '%\n\\begin{list}{}{}\n' ) else: self.out.append( '%\n\\begin{itemize}\n' ) + # if node['classes']: + # self.visit_inline(node) def depart_bullet_list(self, node): + # if node['classes']: + # self.depart_inline(node) if self.is_toc_list: self.out.append( '\n\\end{list}\n' ) else: Modified: trunk/docutils/docutils/writers/xetex/__init__.py =================================================================== --- trunk/docutils/docutils/writers/xetex/__init__.py 2012-11-18 22:11:49 UTC (rev 7537) +++ trunk/docutils/docutils/writers/xetex/__init__.py 2012-11-23 01:18:49 UTC (rev 7538) @@ -76,30 +76,33 @@ # code Polyglossia-name comment 'cop': 'coptic', 'de': 'german', # new spelling (de_1996) - 'de_1901': 'ogerman', # old spelling + 'de-1901': 'ogerman', # old spelling 'dv': 'divehi', # Maldivian 'dsb': 'lsorbian', - 'el_polyton': 'polygreek', + 'el-polyton': 'polygreek', 'fa': 'farsi', 'grc': 'ancientgreek', 'hsb': 'usorbian', - 'sh-cyrl': 'serbian', # Serbo-Croatian, Cyrillic script - 'sh-latn': 'croatian', # Serbo-Croatian, Latin script + 'sh-Cyrl': 'serbian', # Serbo-Croatian, Cyrillic script + 'sh-Latn': 'croatian', # Serbo-Croatian, Latin script 'sq': 'albanian', - 'sr': 'serbian', # Cyrillic script (sr-cyrl) + 'sr': 'serbian', # Cyrillic script (sr-Cyrl) 'th': 'thai', 'vi': 'vietnamese', - # zh-latn: ??? # Chinese Pinyin + # zh-Latn: ??? # Chinese Pinyin }) + # normalize (downcase) keys + language_codes = dict([(k.lower(), v) for (k,v) in language_codes.items()]) + # Languages without Polyglossia support: for key in ('af', # 'afrikaans', - 'de_at', # 'naustrian', - 'de_at_1901', # 'austrian', - 'fr_ca', # 'canadien', - 'grc_ibycus', # 'ibycus', (Greek Ibycus encoding) - 'sr-latn', # 'serbian script=latin' + 'de-AT', # 'naustrian', + 'de-AT-1901', # 'austrian', + 'fr-CA', # 'canadien', + 'grc-ibycus', # 'ibycus', (Greek Ibycus encoding) + 'sr-Latn', # 'serbian script=latin' ): - del(language_codes[key]) + del(language_codes[key.lower()]) def __init__(self, language_code, reporter): self.language_code = language_code Modified: trunk/docutils/test/test_utils.py =================================================================== --- trunk/docutils/test/test_utils.py 2012-11-18 22:11:49 UTC (rev 7537) +++ trunk/docutils/test/test_utils.py 2012-11-23 01:18:49 UTC (rev 7538) @@ -240,12 +240,15 @@ def test_normalize_language_tag(self): self.assertEqual(utils.normalize_language_tag('de'), ['de']) self.assertEqual(utils.normalize_language_tag('de-AT'), - ['de_at', 'de']) + ['de-at', 'de']) self.assertEqual(utils.normalize_language_tag('de-AT-1901'), - ['de_at_1901', 'de_at', 'de_1901', 'de']) + ['de-at-1901', 'de-at', 'de-1901', 'de']) self.assertEqual(utils.normalize_language_tag('de-AT-1901-frak'), - ['de_at_1901_frak', 'de_at_1901', 'de_at_frak', - 'de_1901_frak', 'de_at', 'de_1901', 'de_frak', 'de']) + ['de-at-1901-frak', 'de-at-1901', 'de-at-frak', + 'de-1901-frak', 'de-at', 'de-1901', 'de-frak', 'de']) + self.assertEqual(utils.normalize_language_tag('grc-ibycus-x-altquot'), + ['grc-ibycus-x-altquot', 'grc-ibycus', + 'grc-x-altquot', 'grc']) def test_column_width(self): self.assertEqual(utils.column_width(u'de'), 2) This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |