From: <pau...@us...> - 2011-11-04 01:09:52
|
Author: paultremblay Date: 2011-11-04 02:09:44 +0100 (Fri, 04 Nov 2011) New Revision: 7212 Modified: trunk/sandbox/paultremblay/other/asciitomathml.py trunk/sandbox/paultremblay/other/test_asciimath.py trunk/sandbox/paultremblay/other/utils.py Log: More support in math module. The utils changes include a simple way to copy a tree, if needed in XML writer. Modified: trunk/sandbox/paultremblay/other/asciitomathml.py =================================================================== --- trunk/sandbox/paultremblay/other/asciitomathml.py 2011-11-01 04:39:50 UTC (rev 7211) +++ trunk/sandbox/paultremblay/other/asciitomathml.py 2011-11-04 01:09:44 UTC (rev 7212) @@ -6,16 +6,28 @@ class AsciiMathML: - symbol_dict = { + greek_dict = { 'alpha': u"\u03B1", } + operator_dict = { + 'sum':u"\u2211", + u"\u2211":u"\u2211", + 'Sigma':u"\u2211", + } + symbol_dict = {} + symbol_dict.update(greek_dict) special_dict = { '(':{'type':'special'}, ')':{'type':'special'}, '/':{'type':'special'}, + '^':{'type':'special'}, + '_':{'type':'special'}, + } symbol_names = sorted(symbol_dict.keys(), key=lambda key_string: len(key_string), reverse=True) - special_names = ['(', ')', '/'] + special_names = sorted(special_dict.keys(), key=lambda key_string: len(key_string), reverse=True) + operator_names = sorted(operator_dict.keys(), key=lambda key_string: len(key_string), reverse=True) + # special_names = [ '(', ')', '/', '^', '_'] def __init__(self, output_encoding = 'utf8'): self.__number_re = re.compile('-?(\d+\.(\d+)?|\.?\d+)') @@ -64,7 +76,9 @@ return parent[counter - 1] def __get_last_element(self): - return self.__append_el[-1] + if len(self.__append_el) > 0: + return self.__append_el[-1] + return self.__append_el def __get_following_sibling(self, element, the_tree = 0): if the_tree == 0: @@ -102,8 +116,13 @@ parent = self.__get_parent(child = child, the_tree = the_tree) grandparent = self.__get_parent(child = parent, the_tree = the_tree) + def __is_parenthesis(self, element): + if element != None and element.tag == 'mfenced' and element.get('open') == '(' and\ + element.get('close') == ')': + return True - def __make_new_element(self, element, name, **attributes): + + def __change_element(self, element, name, **attributes): element.tag = name the_keys = element.attrib.keys() for the_key in the_keys: @@ -149,40 +168,80 @@ element = self.__make_element('mi', text=token) self.__append_el.append(element) - def __add_operator_to_tree(self, token, the_type): - element = self.__make_element('mo', text=token) + def __add_operator_to_tree(self, token, token_info): + if isinstance(token_info, dict): + text = token_info.get('symbol') + else: + text = token + element = self.__make_element('mo', text=text) self.__append_el.append(element) - def __add_special_to_tree(self, token, the_type): - if token == '(': - element = self.__make_element('mfenced', open='(', separators='') - self.__append_el.append(element) - self.__append_el = element - elif token == ')': - if self.__append_el.tag == 'mfenced' and self.__append_el.get('open') == '(': - self.__append_el.set('close', ')') - parent = self.__get_parent(self.__append_el) - self.__append_el = parent - else: - element = self.__make_element('mo', text=')') - self.__append_el.append(element) - elif token == '/': - last_element = self.__get_last_element() - if last_element != None and last_element.tag == 'mfenced' and last_element.get('open') == '(' and\ - last_element.get('close') == ')': - last_element.tag = 'mrow' - the_keys = last_element.attrib.keys() - for the_key in the_keys: - del(last_element.attrib[the_key]) + def __handle_binary(self, token, info): + last_element = self.__get_last_element() + if last_element == self.__append_el: # no "previous sibling," and can't process + self.__add_operator_to_tree(token, info) + return + if token == '/': + num_frac = 0 + if last_element.tag == 'mfrac': + for child in last_element: + if child.tag == 'mfrac': + num_frac +=1 + if num_frac % 2 != 0: + self.__append_el = last_element + last_element = self.__get_last_element() + if self.__is_parenthesis(last_element): + self.__change_element(last_element, 'mrow', **{'class':'nominator'}) nominator = deepcopy(last_element) - nominator.set('class', 'nominator') self.__append_el.remove(last_element) mfrac = self.__make_element('mfrac', nominator) self.__append_el.append(mfrac) self.__append_el = mfrac + elif token == '^' or token == '_': + if last_element.tag == 'msub': + subsup = self.__make_element('msubsup') + for child in last_element: # should be just 2--check? + element = deepcopy(child) + subsup.append(element) + self.__append_el.remove(last_element) + self.__append_el.append(subsup) + self.__append_el = subsup + else: + if token == '^': + el_name = 'msup' + elif token == '_': + el_name = 'msub' + base = deepcopy(last_element) + self.__append_el.remove(last_element) + base = self.__make_element(el_name, base) + self.__append_el.append(base) + self.__append_el = base + def __handle_open_parenthesis(self): + element = self.__make_element('mfenced', open='(', separators='', close="") + self.__append_el.append(element) + self.__append_el = element + def __handle_close_parenthesis(self): + if self.__append_el.tag == 'mfenced' and self.__append_el.get('open') == '(': + self.__append_el.set('close', ')') + parent = self.__get_parent(self.__append_el) + self.__append_el = parent + else: + element = self.__make_element('mo', text=')') + self.__append_el.append(element) + + def __add_special_to_tree(self, token, the_type): + if token == '(': + self.__handle_open_parenthesis() + elif token == ')': + self.__handle_close_parenthesis() + elif token == '/' or token == '^' or token == '_': + self.__handle_binary(token, the_type) + + + def parse_string(self, the_string): while the_string != '': the_string, token, token_info = self.__parse_tokens(the_string) @@ -199,19 +258,33 @@ elif the_type == 'symbol': self.__add_symbol_to_tree(token, token_info) elif the_type == 'operator': - self.__add_operator_to_tree(token, the_type) + self.__add_operator_to_tree(token, token_info) elif the_type == 'special': self.__add_special_to_tree(token, the_type) # for all elements - if self.__append_el.tag == 'mfrac': + if self.__append_el.tag == 'mfrac' or self.__append_el.tag == 'msup' or self.__append_el.tag == 'msub': last_element = self.__get_last_element() prev_sib = self.__get_previous_sibling(last_element) if prev_sib != None: - if last_element.tag == 'mfenced' and last_element.get('open') == '(' and\ - last_element.get('close') == ')': - self.__make_new_element(last_element, 'mrow', **{'class':'denominator'}) + if self.__is_parenthesis(last_element): + if self.__append_el.tag == 'mfrac': + the_dict = {'class':'denominator'} + elif self.__append_el.tag == 'msup': + the_dict = {'class':'superscript'} + elif self.__append_el.tag == 'msub': + the_dict = {'class':'subcript'} + self.__change_element(last_element, 'mrow', **the_dict) self.__append_el = self.__get_parent(self.__append_el) + elif self.__append_el.tag =='msubsup': + last_element = self.__get_last_element() + prev_sib = self.__get_previous_sibling(last_element) + prev_prev_sib =self.__get_previous_sibling(prev_sib) + if prev_prev_sib != None: + if self.__is_parenthesis(last_element): + the_dict = {'class':'subsuper'} + self.__change_element(last_element, 'mrow', **the_dict) + self.__append_el = self.__get_parent(self.__append_el) def __look_at_next_token(self, the_string): @@ -249,13 +322,19 @@ for name in self.special_names: if the_string.startswith(name): special = the_string[:len(name)] - info = self.special_dict[special] # do nothing with this for now + info = self.special_dict[special] return the_string[len(name):], special, info + for name in self.operator_names: # found special operator + if the_string.startswith(name): + symbol = the_string[:len(name)] + symbol = self.operator_dict[symbol] + return the_string[len(name):], name, {'type': 'operator', 'symbol': symbol} + for name in self.symbol_names: # found a special symbol if the_string.startswith(name): symbol = the_string[:len(name)] - symbol = self.symbol_dict[symbol] # do nothing with this for now + symbol = self.symbol_dict[symbol] return the_string[len(name):], name, {'type': 'symbol', 'symbol': symbol} # found either an operator or a letter Modified: trunk/sandbox/paultremblay/other/test_asciimath.py =================================================================== --- trunk/sandbox/paultremblay/other/test_asciimath.py 2011-11-01 04:39:50 UTC (rev 7211) +++ trunk/sandbox/paultremblay/other/test_asciimath.py 2011-11-04 01:09:44 UTC (rev 7212) @@ -90,7 +90,7 @@ xml_string = mathml_obj.to_xml_string() self.assertEquals(xml_string, result) - def test_to_xml_string_special(self): + def __test_to_xml_string_parenthesis(self): the_strings = [ ('(x)', '<math xmlns="http://www.w3.org/1998/Math/MathML"><mstyle><mfenced close=")" open="("><mi>x</mi></mfenced></mstyle></math>'), ('(x)x', '<math xmlns="http://www.w3.org/1998/Math/MathML"><mstyle><mfenced close=")" open="("><mi>x</mi></mfenced><mi>x</mi></mstyle></math>'), @@ -106,13 +106,12 @@ self.assertEquals(xml_string, result) def test_anything(self): - the_string = '(1 + 2)/2' + the_string = 'x^2' mathml_obj = asciitomathml.AsciiMathML(output_encoding='us-ascii') mathml_obj.parse_string(the_string) xml_string = mathml_obj.to_xml_string() - print xml_string - def test_fractons(self): + def test_fractions(self): the_strings = [ ('1/2', '<math xmlns="http://www.w3.org/1998/Math/MathML"><mstyle><mfrac><mn>1</mn><mn>2</mn></mfrac></mstyle></math>'), ('1/2 3', '<math xmlns="http://www.w3.org/1998/Math/MathML"><mstyle><mfrac><mn>1</mn><mn>2</mn></mfrac><mn>3</mn></mstyle></math>'), Modified: trunk/sandbox/paultremblay/other/utils.py =================================================================== --- trunk/sandbox/paultremblay/other/utils.py 2011-11-01 04:39:50 UTC (rev 7211) +++ trunk/sandbox/paultremblay/other/utils.py 2011-11-04 01:09:44 UTC (rev 7212) @@ -921,3 +921,46 @@ read_obj.close() docutils_tree = the_handle.get_tree() return docutils_tree + +import xml.dom.minidom +import xml.sax.saxutils + + """ + takes a dom element as current_element + + """ + +def start_tag(local_name): + sys.stdout.write('<%s>' % local_name) + +def end_tag(local_name): + sys.stdout.write('</%s>' % local_name) + +dom = xml.dom.minidom.parse('test.xml') +out_doc = xml.dom.minidom.Document() +def copy_tree(current_element): + elements = current_element.childNodes + for element in elements: + if element.nodeType == xml.dom.Node.ELEMENT_NODE: + element_name = element.localName + if element.attributes!= None: + for attr in element.attributes.values(): + ns = attr.namespaceURI + local_name = attr.localName + name = attr.name + value = attr.value + prefix = attr.prefix + new_att = out_doc.createAttribute(name ) + start_tag(element_name) + copy_tree(element) + end_tag(element_name) + elif element.nodeType == xml.dom.Node.TEXT_NODE: + parent = element.parentNode + if parent.localName == 'math': + sys.stdout.write(element.data) + else: + sys.stdout.write(element.data) + + + +copy_tree(dom) |