Update of /cvsroot/plone-docs/PloneBook/scripts In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv4038/scripts Added Files: Tag: html2rst.py html4-lat1.txt html4-special.txt html4-symbol.txt mapping.py rst2html.py stx2html.py tif2jpg.py Log Message: Second attempt at checking in images and scripts --- NEW FILE: html2rst.py --- #!/usr/bin/python2.3 #$Id: html2rst.py,v 1.1.2.1 2004/06/07 03:40:19 zopezen Exp $ #Copyright: ClearWind Consulting Ltd #License: http://www.clearwind.ca/license # converts the HTML output by OOo into # restructured text which should keep most people # happy # version 0.1 (my knowledge of restx is limited) import sys import pprint from HTMLParser import HTMLParser import StringIO try: from mapping import mapping, chars, cgi except ImportError: print "Run the script with the register option first" sys.exit() def normalise(data): if not data: data = '' data = data.replace('\n', ' ') return data class BookParser(HTMLParser): def __init__(self): HTMLParser.__init__(self) self._map = {} self._dest = None self._out = StringIO.StringIO() self._tag_stack = [] self._data_stack = [] self._method_history = [] def handle_starttag(self, tag, attrs): sig = str([tag, attrs]) mth = mapping.get(sig) # no method for this signature if not mth: return # we have a method name, but nobody wrote it method = getattr(self, mth, None) if not method: raise NotImplementedError, "No method written for: %s" % mth self._tag_stack.append((method, tag, attrs)) self._new_method_event(mth) self._data_stack.append('') def handle_data(self, data): if self._data_stack: self._data_stack[-1] = self._data_stack[-1] + data def handle_entityref(self, data): data = cgi.get("&%s;" % data) self._data_stack[-1] = self._data_stack[-1] + data def handle_endtag(self, tag): if self._tag_stack: t = self._tag_stack.pop() d = self._data_stack.pop() if self._tag_stack: # print the sucker into the tag # above, for example bold # goes into the paragraph tag self.setDest(self.handle_data) t[0](d) self.setDest(None) else: # print normally t[0](d) def _new_method_event(self, new): if self._method_history: last = self._method_history[-1] if last == 'code' and new != 'code': self.write() self._method_history.append(new) def setDest(self, dest=None): self._dest = dest def write(self, data=None, flag=None): # write into whatever dest maybe if data is None: data = '' if flag is None: data = normalise(data) if self._dest is None: self._out.write("%s\n" % data) else: self._dest(data) def someTitle(self, data, char): self.write(data) self.write(char * len(data)) self.write() def chapterTitle(self, data): self.someTitle(data, '-') def subChapterTitle(self, data): self.someTitle(data, '=') def quote(self, data): self.write(" %s" % data) self.write() def paragraph(self, data): self.write(data) self.write() def bold(self, data): if data.startswith('http://'): # we have a link self.write("`%s <%s>`_" % (data, data)) else: self.write("**%s**" % data) def italics(self, data): self.write("*%s*" % data) # underline for url's and other bits, which # dont really make sens in HTML def underline(self, data): self.italics(data) # make image link def img(self, data): self.write(".. image:: img/%s" % data) # each line of code gets its own code tag def code(self, data): if self._method_history and self._method_history[-2] != 'code': self._method_history.append('code') self.write('::|newline|', flag=1) # force new line self.write('%s' % data) def heading1(self, data): self.someTitle(data, '>') def heading2(self, data): self.someTitle(data, '<') def heading3(self, data): self.someTitle(data, '~') def heading4(self, data): self.someTitle(data, '.') def heading5(self, data): self.someTitle(data, ',') def heading6(self, data): self.someTitle(data, '#') def fixupHTML(data): # perform hacky fixups... import re image = re.compile('<I><B>Insert 3294f(.*).tif</B></I>') for g in image.findall(data): # convert into psuedo xml data = data.replace('<I><B>Insert 3294f%s.tif</B></I>' % g, '<img>3294f%s.png</img>' % g) # any single or multiple chars for k, v in chars.items(): data = data.replace(k, v) return data def fixupRST(data): add = """ .. include:: html4-lat1.txt """ for k, v in cgi.items(): data = data.replace(k, v) return data def convert(file): data = open(file, 'rb').read() data = fixupHTML(data) b = BookParser() b.feed(data) data = b._out.getvalue() print fixupRST(data) # go through and register every HTML element class SigParser(HTMLParser): def __init__(self): HTMLParser.__init__(self) self._map = {} def handle_starttag(self, tag, attrs): self._map[str([tag, attrs])] = None def registerSignatures(file): data = open(file, 'rb').read() b = SigParser() b.feed(data) print "mapping = {" pprint.pprint(b._map) if __name__=='__main__': if len(sys.argv) < 2: print "Usage: script file [register]" sys.exit(1) file = sys.argv[1] if len(sys.argv) > 2 and sys.argv[2] == 'register': registerSignatures(file) else: convert(file) --- NEW FILE: html4-lat1.txt --- .. |Aacute| unicode:: U+000C1 .. LATIN CAPITAL LETTER A WITH ACUTE .. |aacute| unicode:: U+000E1 .. LATIN SMALL LETTER A WITH ACUTE .. |Acirc| unicode:: U+000C2 .. LATIN CAPITAL LETTER A WITH CIRCUMFLEX .. |acirc| unicode:: U+000E2 .. LATIN SMALL LETTER A WITH CIRCUMFLEX .. |acute| unicode:: U+000B4 .. ACUTE ACCENT .. |AElig| unicode:: U+000C6 .. LATIN CAPITAL LETTER AE .. |aelig| unicode:: U+000E6 .. LATIN SMALL LETTER AE .. |Agrave| unicode:: U+000C0 .. LATIN CAPITAL LETTER A WITH GRAVE .. |agrave| unicode:: U+000E0 .. LATIN SMALL LETTER A WITH GRAVE .. |Aring| unicode:: U+000C5 .. LATIN CAPITAL LETTER A WITH RING ABOVE .. |aring| unicode:: U+000E5 .. LATIN SMALL LETTER A WITH RING ABOVE .. |Atilde| unicode:: U+000C3 .. LATIN CAPITAL LETTER A WITH TILDE .. |atilde| unicode:: U+000E3 .. LATIN SMALL LETTER A WITH TILDE .. |Auml| unicode:: U+000C4 .. LATIN CAPITAL LETTER A WITH DIAERESIS .. |auml| unicode:: U+000E4 .. LATIN SMALL LETTER A WITH DIAERESIS .. |brvbar| unicode:: U+000A6 .. BROKEN BAR .. |Ccedil| unicode:: U+000C7 .. LATIN CAPITAL LETTER C WITH CEDILLA .. |ccedil| unicode:: U+000E7 .. LATIN SMALL LETTER C WITH CEDILLA .. |cedil| unicode:: U+000B8 .. CEDILLA .. |cent| unicode:: U+000A2 .. CENT SIGN .. |copy| unicode:: U+000A9 .. COPYRIGHT SIGN .. |curren| unicode:: U+000A4 .. CURRENCY SIGN .. |deg| unicode:: U+000B0 .. DEGREE SIGN .. |divide| unicode:: U+000F7 .. DIVISION SIGN .. |Eacute| unicode:: U+000C9 .. LATIN CAPITAL LETTER E WITH ACUTE .. |eacute| unicode:: U+000E9 .. LATIN SMALL LETTER E WITH ACUTE .. |Ecirc| unicode:: U+000CA .. LATIN CAPITAL LETTER E WITH CIRCUMFLEX .. |ecirc| unicode:: U+000EA .. LATIN SMALL LETTER E WITH CIRCUMFLEX .. |Egrave| unicode:: U+000C8 .. LATIN CAPITAL LETTER E WITH GRAVE .. |egrave| unicode:: U+000E8 .. LATIN SMALL LETTER E WITH GRAVE .. |ETH| unicode:: U+000D0 .. LATIN CAPITAL LETTER ETH .. |eth| unicode:: U+000F0 .. LATIN SMALL LETTER ETH .. |Euml| unicode:: U+000CB .. LATIN CAPITAL LETTER E WITH DIAERESIS .. |euml| unicode:: U+000EB .. LATIN SMALL LETTER E WITH DIAERESIS .. |frac12| unicode:: U+000BD .. VULGAR FRACTION ONE HALF .. |frac14| unicode:: U+000BC .. VULGAR FRACTION ONE QUARTER .. |frac34| unicode:: U+000BE .. VULGAR FRACTION THREE QUARTERS .. |Iacute| unicode:: U+000CD .. LATIN CAPITAL LETTER I WITH ACUTE .. |iacute| unicode:: U+000ED .. LATIN SMALL LETTER I WITH ACUTE .. |Icirc| unicode:: U+000CE .. LATIN CAPITAL LETTER I WITH CIRCUMFLEX .. |icirc| unicode:: U+000EE .. LATIN SMALL LETTER I WITH CIRCUMFLEX .. |iexcl| unicode:: U+000A1 .. INVERTED EXCLAMATION MARK .. |Igrave| unicode:: U+000CC .. LATIN CAPITAL LETTER I WITH GRAVE .. |igrave| unicode:: U+000EC .. LATIN SMALL LETTER I WITH GRAVE .. |iquest| unicode:: U+000BF .. INVERTED QUESTION MARK .. |Iuml| unicode:: U+000CF .. LATIN CAPITAL LETTER I WITH DIAERESIS .. |iuml| unicode:: U+000EF .. LATIN SMALL LETTER I WITH DIAERESIS .. |laquo| unicode:: U+000AB .. LEFT-POINTING DOUBLE ANGLE QUOTATION MARK .. |macr| unicode:: U+000AF .. MACRON .. |micro| unicode:: U+000B5 .. MICRO SIGN .. |middot| unicode:: U+000B7 .. MIDDLE DOT .. |nbsp| unicode:: U+000A0 .. NO-BREAK SPACE .. |not| unicode:: U+000AC .. NOT SIGN .. |Ntilde| unicode:: U+000D1 .. LATIN CAPITAL LETTER N WITH TILDE .. |ntilde| unicode:: U+000F1 .. LATIN SMALL LETTER N WITH TILDE .. |Oacute| unicode:: U+000D3 .. LATIN CAPITAL LETTER O WITH ACUTE .. |oacute| unicode:: U+000F3 .. LATIN SMALL LETTER O WITH ACUTE .. |Ocirc| unicode:: U+000D4 .. LATIN CAPITAL LETTER O WITH CIRCUMFLEX .. |ocirc| unicode:: U+000F4 .. LATIN SMALL LETTER O WITH CIRCUMFLEX .. |Ograve| unicode:: U+000D2 .. LATIN CAPITAL LETTER O WITH GRAVE .. |ograve| unicode:: U+000F2 .. LATIN SMALL LETTER O WITH GRAVE .. |ordf| unicode:: U+000AA .. FEMININE ORDINAL INDICATOR .. |ordm| unicode:: U+000BA .. MASCULINE ORDINAL INDICATOR .. |Oslash| unicode:: U+000D8 .. LATIN CAPITAL LETTER O WITH STROKE .. |oslash| unicode:: U+000F8 .. LATIN SMALL LETTER O WITH STROKE .. |Otilde| unicode:: U+000D5 .. LATIN CAPITAL LETTER O WITH TILDE .. |otilde| unicode:: U+000F5 .. LATIN SMALL LETTER O WITH TILDE .. |Ouml| unicode:: U+000D6 .. LATIN CAPITAL LETTER O WITH DIAERESIS .. |ouml| unicode:: U+000F6 .. LATIN SMALL LETTER O WITH DIAERESIS .. |para| unicode:: U+000B6 .. PILCROW SIGN .. |plusmn| unicode:: U+000B1 .. PLUS-MINUS SIGN .. |pound| unicode:: U+000A3 .. POUND SIGN .. |raquo| unicode:: U+000BB .. RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK .. |reg| unicode:: U+000AE .. REGISTERED SIGN .. |sect| unicode:: U+000A7 .. SECTION SIGN .. |shy| unicode:: U+000AD .. SOFT HYPHEN .. |sup1| unicode:: U+000B9 .. SUPERSCRIPT ONE .. |sup2| unicode:: U+000B2 .. SUPERSCRIPT TWO .. |sup3| unicode:: U+000B3 .. SUPERSCRIPT THREE .. |szlig| unicode:: U+000DF .. LATIN SMALL LETTER SHARP S .. |THORN| unicode:: U+000DE .. LATIN CAPITAL LETTER THORN .. |thorn| unicode:: U+000FE .. LATIN SMALL LETTER THORN .. |times| unicode:: U+000D7 .. MULTIPLICATION SIGN .. |Uacute| unicode:: U+000DA .. LATIN CAPITAL LETTER U WITH ACUTE .. |uacute| unicode:: U+000FA .. LATIN SMALL LETTER U WITH ACUTE .. |Ucirc| unicode:: U+000DB .. LATIN CAPITAL LETTER U WITH CIRCUMFLEX .. |ucirc| unicode:: U+000FB .. LATIN SMALL LETTER U WITH CIRCUMFLEX .. |Ugrave| unicode:: U+000D9 .. LATIN CAPITAL LETTER U WITH GRAVE .. |ugrave| unicode:: U+000F9 .. LATIN SMALL LETTER U WITH GRAVE .. |uml| unicode:: U+000A8 .. DIAERESIS .. |Uuml| unicode:: U+000DC .. LATIN CAPITAL LETTER U WITH DIAERESIS .. |uuml| unicode:: U+000FC .. LATIN SMALL LETTER U WITH DIAERESIS .. |Yacute| unicode:: U+000DD .. LATIN CAPITAL LETTER Y WITH ACUTE .. |yacute| unicode:: U+000FD .. LATIN SMALL LETTER Y WITH ACUTE .. |yen| unicode:: U+000A5 .. YEN SIGN .. |yuml| unicode:: U+000FF .. LATIN SMALL LETTER Y WITH DIAERESIS --- NEW FILE: html4-special.txt --- .. |bdquo| unicode:: U+0201E .. DOUBLE LOW-9 QUOTATION MARK .. |circ| unicode:: U+002C6 .. MODIFIER LETTER CIRCUMFLEX ACCENT .. |Dagger| unicode:: U+02021 .. DOUBLE DAGGER .. |dagger| unicode:: U+02020 .. DAGGER .. |emsp| unicode:: U+02003 .. EM SPACE .. |ensp| unicode:: U+02002 .. EN SPACE .. |euro| unicode:: U+020AC .. EURO SIGN .. |gt| unicode:: U+0003E .. GREATER-THAN SIGN .. |ldquo| unicode:: U+0201C .. LEFT DOUBLE QUOTATION MARK .. |lrm| unicode:: U+0200E .. LEFT-TO-RIGHT MARK .. |lsaquo| unicode:: U+02039 .. SINGLE LEFT-POINTING ANGLE QUOTATION MARK .. |lsquo| unicode:: U+02018 .. LEFT SINGLE QUOTATION MARK .. |lt| unicode:: U+0003C .. LESS-THAN SIGN .. |mdash| unicode:: U+02014 .. EM DASH .. |ndash| unicode:: U+02013 .. EN DASH .. |OElig| unicode:: U+00152 .. LATIN CAPITAL LIGATURE OE .. |oelig| unicode:: U+00153 .. LATIN SMALL LIGATURE OE .. |permil| unicode:: U+02030 .. PER MILLE SIGN .. |quot| unicode:: U+00022 .. QUOTATION MARK .. |rdquo| unicode:: U+0201D .. RIGHT DOUBLE QUOTATION MARK .. |rlm| unicode:: U+0200F .. RIGHT-TO-LEFT MARK .. |rsaquo| unicode:: U+0203A .. SINGLE RIGHT-POINTING ANGLE QUOTATION MARK .. |rsquo| unicode:: U+02019 .. RIGHT SINGLE QUOTATION MARK .. |sbquo| unicode:: U+0201A .. SINGLE LOW-9 QUOTATION MARK .. |Scaron| unicode:: U+00160 .. LATIN CAPITAL LETTER S WITH CARON .. |scaron| unicode:: U+00161 .. LATIN SMALL LETTER S WITH CARON .. |thinsp| unicode:: U+02009 .. THIN SPACE .. |tilde| unicode:: U+002DC .. SMALL TILDE .. |Yuml| unicode:: U+00178 .. LATIN CAPITAL LETTER Y WITH DIAERESIS .. |zwj| unicode:: U+0200D .. ZERO WIDTH JOINER .. |zwnj| unicode:: U+0200C .. ZERO WIDTH NON-JOINER --- NEW FILE: html4-symbol.txt --- .. |alefsym| unicode:: U+02135 .. ALEF SYMBOL .. |Alpha| unicode:: U+00391 .. GREEK CAPITAL LETTER ALPHA .. |alpha| unicode:: U+003B1 .. GREEK SMALL LETTER ALPHA .. |and| unicode:: U+02227 .. LOGICAL AND .. |ang| unicode:: U+02220 .. ANGLE .. |asymp| unicode:: U+02248 .. ALMOST EQUAL TO .. |Beta| unicode:: U+00392 .. GREEK CAPITAL LETTER BETA .. |beta| unicode:: U+003B2 .. GREEK SMALL LETTER BETA .. |bull| unicode:: U+02022 .. BULLET .. |cap| unicode:: U+02229 .. INTERSECTION .. |Chi| unicode:: U+003A7 .. GREEK CAPITAL LETTER CHI .. |chi| unicode:: U+003C7 .. GREEK SMALL LETTER CHI .. |clubs| unicode:: U+02663 .. BLACK CLUB SUIT .. |cong| unicode:: U+02245 .. APPROXIMATELY EQUAL TO .. |crarr| unicode:: U+021B5 .. DOWNWARDS ARROW WITH CORNER LEFTWARDS .. |cup| unicode:: U+0222A .. UNION .. |dArr| unicode:: U+021D3 .. DOWNWARDS DOUBLE ARROW .. |darr| unicode:: U+02193 .. DOWNWARDS ARROW .. |Delta| unicode:: U+00394 .. GREEK CAPITAL LETTER DELTA .. |delta| unicode:: U+003B4 .. GREEK SMALL LETTER DELTA .. |diams| unicode:: U+02666 .. BLACK DIAMOND SUIT .. |empty| unicode:: U+02205 .. EMPTY SET .. |Epsilon| unicode:: U+00395 .. GREEK CAPITAL LETTER EPSILON .. |epsilon| unicode:: U+003B5 .. GREEK SMALL LETTER EPSILON .. |equiv| unicode:: U+02261 .. IDENTICAL TO .. |Eta| unicode:: U+00397 .. GREEK CAPITAL LETTER ETA .. |eta| unicode:: U+003B7 .. GREEK SMALL LETTER ETA .. |exist| unicode:: U+02203 .. THERE EXISTS .. |fnof| unicode:: U+00192 .. LATIN SMALL LETTER F WITH HOOK .. |forall| unicode:: U+02200 .. FOR ALL .. |frasl| unicode:: U+02044 .. FRACTION SLASH .. |Gamma| unicode:: U+00393 .. GREEK CAPITAL LETTER GAMMA .. |gamma| unicode:: U+003B3 .. GREEK SMALL LETTER GAMMA .. |ge| unicode:: U+02265 .. GREATER-THAN OR EQUAL TO .. |hArr| unicode:: U+021D4 .. LEFT RIGHT DOUBLE ARROW .. |harr| unicode:: U+02194 .. LEFT RIGHT ARROW .. |hearts| unicode:: U+02665 .. BLACK HEART SUIT .. |hellip| unicode:: U+02026 .. HORIZONTAL ELLIPSIS .. |image| unicode:: U+02111 .. BLACK-LETTER CAPITAL I .. |infin| unicode:: U+0221E .. INFINITY .. |int| unicode:: U+0222B .. INTEGRAL .. |Iota| unicode:: U+00399 .. GREEK CAPITAL LETTER IOTA .. |iota| unicode:: U+003B9 .. GREEK SMALL LETTER IOTA .. |isin| unicode:: U+02208 .. ELEMENT OF .. |Kappa| unicode:: U+0039A .. GREEK CAPITAL LETTER KAPPA .. |kappa| unicode:: U+003BA .. GREEK SMALL LETTER KAPPA .. |Lambda| unicode:: U+0039B .. GREEK CAPITAL LETTER LAMDA .. |lambda| unicode:: U+003BB .. GREEK SMALL LETTER LAMDA .. |lang| unicode:: U+02329 .. LEFT-POINTING ANGLE BRACKET .. |lArr| unicode:: U+021D0 .. LEFTWARDS DOUBLE ARROW .. |larr| unicode:: U+02190 .. LEFTWARDS ARROW .. |lceil| unicode:: U+02308 .. LEFT CEILING .. |le| unicode:: U+02264 .. LESS-THAN OR EQUAL TO .. |lfloor| unicode:: U+0230A .. LEFT FLOOR .. |lowast| unicode:: U+02217 .. ASTERISK OPERATOR .. |loz| unicode:: U+025CA .. LOZENGE .. |minus| unicode:: U+02212 .. MINUS SIGN .. |Mu| unicode:: U+0039C .. GREEK CAPITAL LETTER MU .. |mu| unicode:: U+003BC .. GREEK SMALL LETTER MU .. |nabla| unicode:: U+02207 .. NABLA .. |ne| unicode:: U+02260 .. NOT EQUAL TO .. |ni| unicode:: U+0220B .. CONTAINS AS MEMBER .. |notin| unicode:: U+02209 .. NOT AN ELEMENT OF .. |nsub| unicode:: U+02284 .. NOT A SUBSET OF .. |Nu| unicode:: U+0039D .. GREEK CAPITAL LETTER NU .. |nu| unicode:: U+003BD .. GREEK SMALL LETTER NU .. |oline| unicode:: U+0203E .. OVERLINE .. |Omega| unicode:: U+003A9 .. GREEK CAPITAL LETTER OMEGA .. |omega| unicode:: U+003C9 .. GREEK SMALL LETTER OMEGA .. |Omicron| unicode:: U+0039F .. GREEK CAPITAL LETTER OMICRON .. |omicron| unicode:: U+003BF .. GREEK SMALL LETTER OMICRON .. |oplus| unicode:: U+02295 .. CIRCLED PLUS .. |or| unicode:: U+02228 .. LOGICAL OR .. |otimes| unicode:: U+02297 .. CIRCLED TIMES .. |part| unicode:: U+02202 .. PARTIAL DIFFERENTIAL .. |perp| unicode:: U+022A5 .. UP TACK .. |Phi| unicode:: U+003A6 .. GREEK CAPITAL LETTER PHI .. |phi| unicode:: U+003D5 .. GREEK PHI SYMBOL .. |Pi| unicode:: U+003A0 .. GREEK CAPITAL LETTER PI .. |pi| unicode:: U+003C0 .. GREEK SMALL LETTER PI .. |piv| unicode:: U+003D6 .. GREEK PI SYMBOL .. |Prime| unicode:: U+02033 .. DOUBLE PRIME .. |prime| unicode:: U+02032 .. PRIME .. |prod| unicode:: U+0220F .. N-ARY PRODUCT .. |prop| unicode:: U+0221D .. PROPORTIONAL TO .. |Psi| unicode:: U+003A8 .. GREEK CAPITAL LETTER PSI .. |psi| unicode:: U+003C8 .. GREEK SMALL LETTER PSI .. |radic| unicode:: U+0221A .. SQUARE ROOT .. |rang| unicode:: U+0232A .. RIGHT-POINTING ANGLE BRACKET .. |rArr| unicode:: U+021D2 .. RIGHTWARDS DOUBLE ARROW .. |rarr| unicode:: U+02192 .. RIGHTWARDS ARROW .. |rceil| unicode:: U+02309 .. RIGHT CEILING .. |real| unicode:: U+0211C .. BLACK-LETTER CAPITAL R .. |rfloor| unicode:: U+0230B .. RIGHT FLOOR .. |Rho| unicode:: U+003A1 .. GREEK CAPITAL LETTER RHO .. |rho| unicode:: U+003C1 .. GREEK SMALL LETTER RHO .. |sdot| unicode:: U+022C5 .. DOT OPERATOR .. |Sigma| unicode:: U+003A3 .. GREEK CAPITAL LETTER SIGMA .. |sigma| unicode:: U+003C3 .. GREEK SMALL LETTER SIGMA .. |sigmaf| unicode:: U+003C2 .. GREEK SMALL LETTER FINAL SIGMA .. |sim| unicode:: U+0223C .. TILDE OPERATOR .. |spades| unicode:: U+02660 .. BLACK SPADE SUIT .. |sub| unicode:: U+02282 .. SUBSET OF .. |sube| unicode:: U+02286 .. SUBSET OF OR EQUAL TO .. |sum| unicode:: U+02211 .. N-ARY SUMMATION .. |sup| unicode:: U+02283 .. SUPERSET OF .. |supe| unicode:: U+02287 .. SUPERSET OF OR EQUAL TO .. |Tau| unicode:: U+003A4 .. GREEK CAPITAL LETTER TAU .. |tau| unicode:: U+003C4 .. GREEK SMALL LETTER TAU .. |there4| unicode:: U+02234 .. THEREFORE .. |Theta| unicode:: U+00398 .. GREEK CAPITAL LETTER THETA .. |theta| unicode:: U+003B8 .. GREEK SMALL LETTER THETA .. |thetasym| unicode:: U+003D1 .. GREEK THETA SYMBOL .. |trade| unicode:: U+02122 .. TRADE MARK SIGN .. |uArr| unicode:: U+021D1 .. UPWARDS DOUBLE ARROW .. |uarr| unicode:: U+02191 .. UPWARDS ARROW .. |upsih| unicode:: U+003D2 .. GREEK UPSILON WITH HOOK SYMBOL .. |Upsilon| unicode:: U+003A5 .. GREEK CAPITAL LETTER UPSILON .. |upsilon| unicode:: U+003C5 .. GREEK SMALL LETTER UPSILON .. |weierp| unicode:: U+02118 .. SCRIPT CAPITAL P .. |Xi| unicode:: U+0039E .. GREEK CAPITAL LETTER XI .. |xi| unicode:: U+003BE .. GREEK SMALL LETTER XI .. |Zeta| unicode:: U+00396 .. GREEK CAPITAL LETTER ZETA .. |zeta| unicode:: U+003B6 .. GREEK SMALL LETTER ZETA --- NEW FILE: mapping.py --- (This appears to be a binary file; contents omitted.) --- NEW FILE: rst2html.py --- #!/usr/bin/python2.3 #$Id: rst2html.py,v 1.1.2.1 2004/06/07 03:40:19 zopezen Exp $ #Copyright: ClearWind Consulting Ltd #License: http://www.clearwind.ca/license # converts the Rest output by html2tx into # html which should keep most people # happy # version 0.1 (my knowledge of restx is limited) import sys import os import docutils.core import docutils.io import StringIO from mapping import rchars def convert(file): outgoing = StringIO.StringIO() pub = docutils.core.Publisher() pub.set_reader('standalone', None, 'restructuredtext') pub.set_writer('html') pub.get_settings() pub.source = docutils.io.StringInput(open(file, 'rb').read(), 'iso8859-15') document = pub.reader.read(pub.source, pub.parser, pub.settings) pub.apply_transforms(document) pub.writer.write(document, outgoing) dest = os.path.splitext(file)[0] + '.html' destfh = open(dest, 'w') data = outgoing.getvalue() _data = [] for char in data: if ord(char) > 128: continue try: _data.append(str(char)) except UnicodeEncodeError: pass print "".join(_data) if __name__=='__main__': if len(sys.argv) < 2: print "Usage: script file" sys.exit(1) file = sys.argv[1] convert(file) --- NEW FILE: stx2html.py --- #!/usr/bin/python2.3 #$Id: stx2html.py,v 1.1.2.1 2004/06/07 03:40:19 zopezen Exp $ #Copyright: ClearWind Consulting Ltd #License: http://www.clearwind.ca/license # converts the Rest output by html2tx into # html which should keep most people # happy # version 0.1 (my knowledge of restx is limited) import sys import os import docutils.core import docutils.io import StringIO from mapping import rchars def convert(file): outgoing = StringIO.StringIO() pub = docutils.core.Publisher() pub.set_reader('standalone', None, 'restructuredtext') pub.set_writer('html') pub.get_settings() pub.source = docutils.io.StringInput(open(file, 'rb').read(), 'iso8859-15') document = pub.reader.read(pub.source, pub.parser, pub.settings) pub.apply_transforms(document) pub.writer.write(document, outgoing) dest = os.path.splitext(file)[0] + '.html' destfh = open(dest, 'w') data = outgoing.getvalue() _data = [] for char in data: if ord(char) > 128: continue try: _data.append(str(char)) except UnicodeEncodeError: pass print "".join(_data) if __name__=='__main__': if len(sys.argv) < 2: print "Usage: script file" sys.exit(1) file = sys.argv[1] convert(file) --- NEW FILE: tif2jpg.py --- #!/usr/bin/python #$Id: tif2jpg.py,v 1.1.2.1 2004/06/07 03:40:19 zopezen Exp $ #Copyright: ClearWind Consulting Ltd #License: http://www.clearwind.ca/license import sys import os import glob # Needs ImageMagick # most of the book pics, are tiff def convert(dr): x = r'%s/*.tif' % dr for file in glob.glob(x): i = file o = file[:-4] + '.png' print "Converting", i, o os.system('convert %s %s' % (i,o)) if __name__=='__main__': file = sys.argv[1] convert(file) |