From: Cherniavsky B. <cb...@us...> - 2003-10-09 22:05:01
|
Update of /cvsroot/docutils/sandbox/cben/rolehack In directory sc8-pr-cvs1:/tmp/cvs-serv2444 Modified Files: imgmathhack.py mathhack.py rolehack.py Log Message: The usage of an inline role to generate display math was misguided. Added ability to preprocess simple directives and replaced the ``texdisplay`` role with a ``texmath::`` directive. Fixed some bugs with role parsing (the regexps now closely follow the spec). Index: imgmathhack.py =================================================================== RCS file: /cvsroot/docutils/sandbox/cben/rolehack/imgmathhack.py,v retrieving revision 1.2 retrieving revision 1.3 diff -u -d -r1.2 -r1.3 --- imgmathhack.py 22 Sep 2003 16:30:07 -0000 1.2 +++ imgmathhack.py 9 Oct 2003 22:04:56 -0000 1.3 @@ -1,11 +1,16 @@ #!/usr/bin/env python """ -Treat the default, ``texmath`` and ``texdisplay`` as LaTeX math and convert to -images. It sets the ``:align:`` image parameter differently on -``texdisplay``. +Convert latex math to images. Treats the default and ``texmath`` roles as +inline LaTeX math and the ``texmath::`` directive as display latex math. .. note:: - This runs external commands and leaves files after itself! You need: + This runs external commands and leaves files after itself! To reduce + running time when images are not changed and to reuse images for equal + fomulas, image names are md5 of the formula (hoping that no collisions + will happen) and images that already exist are not rebuilt. You should + clean the ``imgmath`` + + You'll need: - ``tex_to_images`` (part of ``festival``, does anybody know a tool that is more commonly availiable? It's a Perl script which could be asily @@ -18,41 +23,43 @@ - netpbm tools """ -import os, os.path +import os, os.path, md5 from rolehack import * class Tex_to_images(object): """Feeds math to ``tex_to_images``. Always goes through ppm.""" - def __init__(self, dir='./mathhack', out_pattern='mathhack_NNN', - options='-s 1.5', converter='pnmtopng', extension='.png'): + def __init__(self, dir='./imgmath', options='-s 1.5', + converter='pnmtopng', extension='.png'): try: os.mkdir(dir) except OSError: pass - self.counter = 1 self.options = options self.dir = dir - self.out_pattern = out_pattern self.converter = converter self.extension = extension def process(self, text): """Returns output filename.""" - self.fname = self.out_pattern.replace('NNN', str(self.counter)) - self.counter += 1 - fpath = self.fpath = os.path.join(self.dir, self.fname) - f = file(fpath, 'w') - f.write('@Start\n%s\n@End\n' % (text,)) - f.close() - os.system(('tex_to_images -f ppm -d %(dir)s -o %(fname)s.ppm ' - '%(options)s < %(fpath)s >& /dev/null' % vars(self))) - if self.converter: - os.system('%s < %s.ppm > %s%s' % - (self.converter, fpath, fpath, self.extension)) - fpath += self.extension - else: - fpath += '.ppm' - return fpath + dir = self.dir + extension = self.extension + options = self.options + converter = self.converter + fname = md5.new(text).hexdigest() + fpath = os.path.join(dir, fname) + if not os.path.exists(fpath + extension): + f = file(fpath, 'w') + f.write('@Start\n%s\n@End\n' % (text,)) + f.close() + os.system(('tex_to_images -f ppm -d %(dir)s -o %(fname)s.tmp ' + '%(options)s < %(fpath)s >& /dev/null' % vars())) + if self.converter: + os.system('%s < %s.tmp > %s%s' % + (self.converter, fpath, fpath, extension)) + else: + os.rename(fpath + '.tmp', fpath + '.ppm') + os.remove(fpath + '.tmp') + return fpath + extension def texmath(self, text): return 'image:: %s\n :align: middle\n' % (self.process(text),) def texdisplay(self, text): @@ -62,4 +69,4 @@ texmath = child.texmath texdisplay = child.texdisplay -main({'texmath': texmath, 'texdisplay': texdisplay}, default=texmath) +main({'texmath': texmath}, texmath, {'texmath': texdisplay}) Index: mathhack.py =================================================================== RCS file: /cvsroot/docutils/sandbox/cben/rolehack/mathhack.py,v retrieving revision 1.2 retrieving revision 1.3 diff -u -d -r1.2 -r1.3 --- mathhack.py 22 Sep 2003 15:23:45 -0000 1.2 +++ mathhack.py 9 Oct 2003 22:04:56 -0000 1.3 @@ -8,7 +8,7 @@ -- David Goodger. Convert the default and ``texmath`` role to raw latex inline math and the -``texdisplay`` role to display math. +``texmath`` directive to display math. """ from rolehack import * @@ -23,4 +23,4 @@ \[ ''', ' \]\n') -main({'texmath': texmath, 'texdisplay': texdisplay}, default=texmath) +main({'texmath': texmath}, texmath, {'texmath': texdisplay}) Index: rolehack.py =================================================================== RCS file: /cvsroot/docutils/sandbox/cben/rolehack/rolehack.py,v retrieving revision 1.1 retrieving revision 1.2 diff -u -d -r1.1 -r1.2 --- rolehack.py 18 Sep 2003 18:02:42 -0000 1.1 +++ rolehack.py 9 Oct 2003 22:04:56 -0000 1.2 @@ -1,16 +1,19 @@ """Preprocess reStructuredText roles to directives. -This is a stop-gap hack for \"adding\" inline syntaxes to reST, mainly useful +This is a stop-gap hack for prototyping new syntaxes for reST, mainly useful when you find yourself using directives every 3 lines. In such cases an extension to docutils allowing some inline syntax is desired thing and such extensions most probably will take the form of new interpretted text roles. This module allows to easily prototype them by converting given interpretted -text roles to directives. To avoid indentation- and line-related headache, -the uses of the roles are replaced with substitutions and all substitution -definitions are appeneded at the end of the document (hint: use ``replace::`` -if you don't want a directive). @@@ This has the weakness that all directives -become inline. +text roles to directives. To make them inline, the uses of the roles are +replaced with substitutions and all substitution definitions are appeneded at +the end of the document (hint: use ``replace::`` if you don't want a +directive). + +Since what's useful for inline syntaxes might also be useful outside of +paragraphs, preprocessing simple directives (only an argument, no options or +content) into other directives is also supported. I was too lazy to implement an elaborate command-line interface, so this is only a module. You should import it from a python script and call this module @@ -19,6 +22,9 @@ BUGS ==== +There are too many. Most can't be fixed here, the right thing is to extend +the docutils parser... + - Backslashes are not interpretted in any way (except that backticks preceded by backslashes are won't be treated as start/end of interpretted text). This means backslashes are passed to the directive which probably won't stay @@ -27,50 +33,77 @@ This bug is semi-intentional because it makes LaTeX generation easier... -- The default role is only recognized if preceded by any whitespace (including - '\n') or at start of document. - - Any number of lines is consumed in search for the closing backtick, disregarding indentation. The content is pasted into the directive as one line with normalized whitespace. -- Starting and ending contexts for inline markup recognition are not verified. - - The width of the substitution references is not equal to the original, so you can't use it in tables. - Long parts of the document without empty lines might cause ``recursion limit exceeded`` errors. +- Directives not recognized if preceded by non-whitespace (e.g. in a table). + """ import re # Named groups are used to allow simultaneous replacement of all roles. -_re_options = re.IGNORECASE | re.DOTALL | re.VERBOSE +_re_options = re.IGNORECASE | re.MULTILINE | re.DOTALL | re.VERBOSE -_default_re = r'''(?<![\`_:]) # not escaped, literal, target or prefix role - `(?P<DEFAULT> # store as ``DEFAULT`` - (?:[^`]|\\.)+)` # skip escaped chars - (?![_:]) # not reference or postfix role - ''' +def _role_re(group_name): + return r''' + # Start-string: + (?:^|(?<=\s|[\'"([{<\-/:])) + ` + (?=\S) + # Content: + (?P<%(group_name)s> + (?:[^`]|\\.)+ # skip escaped chars + ) + # End-string: + (?<=\S) + ` + (?=$|\s|[#\'")\]}>\-/:.,;!?\\]) + ''' % locals() -_empty_line_re = re.compile('\n[ \t]*\n') # NOT `re.VERBOSE` +_default_re = _role_re('_DEFAULT') def _role2regexp(role): """Return regexp for approximate recognition of `role`.""" - return r''':%(role)s: # prefix role - `(?P<prefix_%(role)s> # group names must differ - (?:[^`]|\\.)+)` # skip escaped chars - | - (?<![\`_:]) # not escaped, literal, target or prefix role - `(?P<postfix_%(role)s> # group names must differ - (?:[^`]|\\.)+)` # skip escaped chars - :%(role)s: # postfix role role - ''' % locals() + prefix_re = _role_re('prefix_' + role) + postfix_re = _role_re('postfix_' + role) + return r''' + :%(role)s: + %(prefix_re)s + | + %(postfix_re)s + :%(role)s: + ''' % locals() -def process(doc, roles, default=None): +def _dir2regexp(dir): + """Return regexp for approximate recognition of directive `dir`.""" + return r''' + ^(?P<indent_%(dir)s> [ \t]* ) # record indentation + \.\. \s+ + (?P<subst_%(dir)s> + ## (?:|[^|]*|)? # optional substitution + ) + \s* + %(dir)s \s* :: + (?P<argument_%(dir)s> + [^\n]* + (?: + \n + (?P=indent_%(dir)s) [ \t] # bigger indentation + [^\n]+ + )* + ) + ''' % locals() + +def process(doc, roles={}, default_role=None, directives={}): """Process `doc` replacing given `roles`. `doc` should be a single string containing the whole document. The @@ -82,17 +115,26 @@ return 'raw:: latex\n\n %s\n' % (text,) See `template()` for an easy way to create such trivial functions. The - optional `default` argument specifies a replacement for the default role. + optional `default_role` argument specifies a replacement for the default + role. + + The `directives` dictionary like `roles` but specifies directive names to + handle. The directive can have only an argument; substitution definitions + with these directives are also recognized. Indentation is adjusted + properly for directives. """ re_parts = [] repls = {} - if default: + if default_role: re_parts.append(_default_re) - repls['DEFAULT'] = default + repls['_DEFAULT'] = default_role for role, repl in roles.items(): re_parts.append(_role2regexp(role)) repls['prefix_' + role] = repls['postfix_' + role] = repl + for dir, repl in directives.items(): + re_parts.append(_dir2regexp(dir)) + repls['argument_' + dir] = repl full_re = '\n|'.join(re_parts) full_re = re.compile(full_re, _re_options) @@ -103,17 +145,26 @@ n += 1 ids = count() def dispatch(match): - role = match.lastgroup - content = ' '.join(match.group(role).split()) - id = ids.next() - subst = '|rolehack_%d|' % (id,) - after_output.append('.. %s %s' % (subst, repls[role](content))) - return subst + groupname = match.lastgroup + content = ' '.join(match.group(groupname).split()) + kind, name = groupname.split('_', 1) + if kind == 'argument': # substitution + indent = match.group('indent_' + name) + subst = match.group('subst_' + name) + repl = '\n.. %s %s' % (subst, repls[groupname](content)) + return indent + repl.replace('\n', '\n' + indent) + else: # role + id = ids.next() + subst = '|rolehack_%d|' % (id,) + repl = '.. %s %s' % (subst, repls[groupname](content)) + after_output.append(repl) + return subst # Hack: process by chunks separated by blank lines, trying to avoid # "recursion limit exceeded" errors. + empty_line_re = re.compile(r'\n[ \t]*\n') output = [full_re.sub(dispatch, chunk) - for chunk in _empty_line_re.split(doc)] + for chunk in empty_line_re.split(doc)] return '\n\n'.join(output + after_output) def template(pre, post): @@ -122,7 +173,7 @@ return ''.join((pre, text, post)) return repl -def main(roles, default=None): +def main(*args, **kw_args): """Simple command-line interface.""" import sys def parse_args(input='-', output='-'): @@ -134,7 +185,7 @@ output = sys.stdout else: output = file(output, 'w') - output.write(process(input.read(), roles, default)) + output.write(process(input.read(), *args, **kw_args)) parse_args(*sys.argv[1:]) ##main({'foo': template('foo::\n\n ', '\n')}, |