Author: ianb
Date: 2004-04-14 01:30:33 -0600 (Wed, 14 Apr 2004)
New Revision: 60
Added:
Wiki/lib/htmldiff.py
Modified:
Wiki/Context/Main.py
Wiki/Context/default.css
Log:
Added difference checking between versions
Modified: Wiki/Context/Main.py
===================================================================
--- Wiki/Context/Main.py 2004-04-14 07:28:58 UTC (rev 59)
+++ Wiki/Context/Main.py 2004-04-14 07:30:33 UTC (rev 60)
@@ -1,5 +1,6 @@
from SitePage import *
from lib import wikipage
+from lib import htmldiff
class Main(SitePage):
@@ -14,7 +15,7 @@
if name != wikipage.canonicalName(name):
self.sendRedirectAndEnd(self.wiki.linkTo(wikipage.canonicalName(name)))
return
- version = req.field('version', None)
+ version = req.field('version', None) or None
self.page = self.wiki.page(name, version=version)
self.view = None
self.titlePrefix = ''
@@ -40,7 +41,7 @@
def actions(self):
return ['edit', 'preview', 'save', 'cancel', 'history',
- 'externalEdit', 'webdav', 'backlinks']
+ 'externalEdit', 'webdav', 'backlinks', 'diff']
def defaultAction(self):
self.view = 'writeRead'
@@ -80,6 +81,12 @@
self.view = 'writeBacklinks'
self.titlePrefix = 'Backlinks to: '
+ def diff(self):
+ otherVersion = self.request().field('otherVersion', None) or None
+ self.otherPage = self.wiki.page(self.page.name, version=otherVersion)
+ self.view = 'writeDiff'
+ self.titlePrefix = 'Diff %s to %s of:' % (self.page.version or 'Current', otherVersion or 'Current')
+
def writeContent(self):
if self.view:
getattr(self, self.view)()
@@ -150,7 +157,7 @@
assert metavars.has_key(name), "Bad name %r (must be one of %s)" % (name, ', '.join(metavars.keys()))
setattr(self.page, metavars[name], value)
self.page.text = '\n'.join(lines)
- self.page.notifyChange()
+ self.page.save()
def username(self):
req = self.request()
@@ -255,28 +262,38 @@
self.writeEdit()
def writeHistory(self):
+ self.write('<form action="%s" method="GET">'
+ % (self.link(unversioned=True)))
+ self.write('<input type="hidden" name="_action_diff" value="yes">\n')
self.write('''<table>
<tr class="header">
<th>Version</th>
<th>Created on</th>
<th>Log</th>
<th>User</th>
+ <th>Compare</th>
</tr>
''')
for index, page in enumerate(self.page.versions()):
self.write('<tr class="%s">\n' %
['odd', 'even'][index%2])
self.write('<td style="text-align: center"><a class="version" href="%s">%s</a></td>\n'
- % (page.link, page.version))
+ % (page.link, page.version or 'current'))
self.write('<td>%s</td>\n' % self.formatDate(page.modifiedDate, nonbreaking=True))
self.write('<td>%s</td>\n'
% self.htmlEncode(page.lastChangeLog))
self.write('<td>%s</td>\n'
% self.htmlEncode(page.lastChangeUser))
+ self.write('''
+ <td><input type="radio" name="version" value="%s">
+ <input type="radio" name="otherVersion" value="%s"></td>
+ ''' % (page.version, page.version))
self.write('</tr>\n')
self.write('</table>')
- self.write('<a href="%s">View %s</a><br>\n'
- % (self.link(), self.page.title))
+ self.write('<input type="submit" name="compare_thorough" value="Compare complete">\n')
+ self.write('<input type="submit" name="compare_brief" value="Compare content">\n')
+ self.write('<input type="submit" name="compare_source" value="Compare source">\n')
+ self.write('</form>\n')
def writeBacklinks(self):
self.write('<table>\n')
@@ -287,3 +304,33 @@
page.link,
page.title))
self.write('</table>')
+
+ def writeDiff(self):
+ compType = 'thorough'
+ for name in self.request().fields().keys():
+ if name.startswith('compare_'):
+ compType = name[len('compare_'):]
+ if compType == 'thorough':
+ Matcher = htmldiff.HTMLMatcher
+ source1 = self.page.html
+ source2 = self.otherPage.html
+ elif compType == 'brief':
+ Matcher = htmldiff.NoTagHTMLMatcher
+ source1 = self.page.html
+ source2 = self.otherPage.html
+ elif compType == 'source':
+ Matcher = htmldiff.TextMatcher
+ source1 = self.page.text
+ source2 = self.otherPage.text
+ else:
+ assert 0, "Unknown comparison type: %r" % compType
+ matcher = Matcher(source1, source2)
+ diff = matcher.htmlDiff()
+ start = 'version %s' % (self.page.version or 'Current')
+ end = 'version %s' % (self.otherPage.version or 'Current')
+ self.write('''
+ <p><span class="insert">Added to %s (present in %s)</span><br>
+ <span class="delete">Deleted from %s (present in %s)</span></p>
+ ''' % (start, end, end, start))
+ self.write(diff)
+
Modified: Wiki/Context/default.css
===================================================================
--- Wiki/Context/default.css 2004-04-14 07:28:58 UTC (rev 59)
+++ Wiki/Context/default.css 2004-04-14 07:30:33 UTC (rev 60)
@@ -131,7 +131,7 @@
h2 {
/* background-color: #666666; */
/* color: #ffffff; */
- border: medium solid black;
+ /* border: medium solid black; */
}
h3, h4, h5, h6 {
@@ -399,3 +399,11 @@
#searchbar tt {
color: #ddddff
}
+
+/* Diff results */
+
+.insert { background-color: #aaffaa }
+.delete { background-color: #ff8888 }
+.tagInsert { background-color: #004400; color: #ffffff }
+.tagDelete { background-color: #770000; color: #ffffff }
+.tagInsert tt { color: #99ff99 }
Added: Wiki/lib/htmldiff.py
===================================================================
--- Wiki/lib/htmldiff.py 2004-04-14 07:28:58 UTC (rev 59)
+++ Wiki/lib/htmldiff.py 2004-04-14 07:30:33 UTC (rev 60)
@@ -0,0 +1,261 @@
+#!/usr/bin/env python
+"""
+htmldiff.py
+(C) Ian Bicking <ianb@...>
+
+Finds the differences between two HTML files. *Not* line-by-line
+comparison (more word-by-word).
+
+Command-line usage:
+ ./htmldiff.py test1.html test2.html
+
+Better results if you use mxTidy first. The output is HTML.
+"""
+
+from difflib import SequenceMatcher
+import re
+try:
+ from cStringIO import StringIO
+except ImportError:
+ from StringIO import StringIO
+import cgi
+
+def htmlEncode(s, esc=cgi.escape):
+ return esc(s, 1)
+
+True, False = 1==1, 1==0
+
+commentRE = re.compile('<!--.*?-->', re.S)
+tagRE = re.compile('<.*?>', re.S)
+headRE = re.compile('<\s*head\s*>', re.S | re.I)
+
+class HTMLMatcher(SequenceMatcher):
+
+ def __init__(self, source1, source2):
+ SequenceMatcher.__init__(self, None, source1, source2)
+
+ def set_seq1(self, a):
+ SequenceMatcher.set_seq1(self, self.splitHTML(a))
+
+ def set_seq2(self, b):
+ SequenceMatcher.set_seq2(self, self.splitHTML(b))
+
+ def splitTags(self, t):
+ result = []
+ pos = 0
+ while 1:
+ match = tagRE.search(t, pos=pos)
+ if not match:
+ result.append(t[pos:])
+ break
+ result.append(t[pos:match.start()])
+ result.append(match.group(0))
+ pos = match.end()
+ return result
+
+ def splitWords(self, t):
+ return t.strip().split()
+
+ def splitHTML(self, t):
+ t = commentRE.sub('', t)
+ r = self.splitTags(t)
+ result = []
+ for item in r:
+ if item.startswith('<'):
+ result.append(item)
+ else:
+ result.extend(self.splitWords(item))
+ return result
+
+ def htmlDiff(self, addStylesheet=False):
+ opcodes = self.get_opcodes()
+ a = self.a
+ b = self.b
+ out = StringIO()
+ #print [o[0] for o in opcodes]
+ for tag, i1, i2, j1, j2 in opcodes:
+ if tag == 'equal':
+ for item in a[i1:i2]:
+ out.write(item)
+ out.write(' ')
+ if tag == 'delete' or tag == 'replace':
+ self.textDelete(a[i1:i2], out)
+ if tag == 'insert' or tag == 'replace':
+ self.textInsert(b[j1:j2], out)
+ html = out.getvalue()
+ out.close()
+ if addStylesheet:
+ html = self.addStylesheet(html, self.stylesheet())
+ return html
+
+ def textDelete(self, lst, out):
+ inSpan = False
+ for item in lst:
+ if item.startswith('<'):
+ if inSpan:
+ out.write(self.endDeleteText())
+ inSpan = False
+ out.write(self.formatDeleteTag(item))
+ else:
+ if not inSpan:
+ out.write(self.startDeleteText())
+ inSpan = True
+ out.write(item)
+ out.write(' ')
+ if inSpan:
+ out.write(self.endDeleteText())
+
+ def textInsert(self, lst, out):
+ inSpan = False
+ for item in lst:
+ if item.startswith('<'):
+ if inSpan:
+ out.write(self.endInsertText())
+ inSpan = False
+ out.write(self.formatInsertTag(item))
+ out.write(item)
+ out.write(' ')
+ else:
+ if not inSpan:
+ out.write(self.startInsertText())
+ inSpan = True
+ out.write(item)
+ out.write(' ')
+ if inSpan:
+ out.write(self.endInsertText())
+
+ def stylesheet(self):
+ return '''
+.insert { background-color: #aaffaa }
+.delete { background-color: #ff8888 }
+.tagInsert { background-color: #007700; color: #ffffff }
+.tagDelete { background-color: #770000; color: #ffffff }
+'''
+
+ def addStylesheet(self, html, ss):
+ match = headRE.search(html)
+ if match:
+ pos = match.end()
+ else:
+ pos = 0
+ return ('%s<style type="text/css"><!--\n%s\n--></style>%s'
+ % (html[:pos], ss, html[pos:]))
+
+ def startInsertText(self):
+ return '<span class="insert">'
+ def endInsertText(self):
+ return '</span> '
+ def startDeleteText(self):
+ return '<span class="delete">'
+ def endDeleteText(self):
+ return '</span> '
+ def formatInsertTag(self, tag):
+ return '<span class="tagInsert">insert: <tt>%s</tt></span> ' % htmlEncode(tag)
+ def formatDeleteTag(self, tag):
+ return '<span class="tagDelete">delete: <tt>%s</tt></span> ' % htmlEncode(tag)
+
+class NoTagHTMLMatcher(HTMLMatcher):
+ def formatInsertTag(self, tag):
+ return ''
+ def formatDeleteTag(self, tag):
+ return ''
+
+def htmldiff(source1, source2, addStylesheet=False):
+ """
+ Return the difference between two pieces of HTML
+
+ >>> htmldiff('test1', 'test2')
+ '<span class="delete">test1 </span> <span class="insert">test2 </span> '
+ >>> htmldiff('test1', 'test1')
+ 'test1 '
+ >>> htmldiff('<b>test1</b>', '<i>test1</i>')
+ '<span class="tagDelete">delete: <tt><b></tt></span> <span class="tagInsert">insert: <tt><i></tt></span> <i> test1 <span class="tagDelete">delete: <tt></b></tt></span> <span class="tagInsert">insert: <tt></i></tt></span> </i> '
+ """
+ h = HTMLMatcher(source1, source2)
+ return h.htmlDiff(addStylesheet)
+
+def diffFiles(f1, f2):
+ source1 = open(f1).read()
+ source2 = open(f2).read()
+ return htmldiff(source1, source2, True)
+
+class SimpleHTMLMatcher(HTMLMatcher):
+ """
+ Like HTMLMatcher, but returns a simpler diff
+ """
+ def startInsertText(self):
+ return '+['
+ def endInsertText(self):
+ return ']'
+ def startDeleteText(self):
+ return '-['
+ def endDeleteText(self):
+ return ']'
+ def formatInsertTag(self, tag):
+ return '+[%s]' % tag
+ def formatDeleteTag(self, tag):
+ return '-[%s]' % tag
+
+def simplehtmldiff(source1, source2):
+ """
+ Simpler form of htmldiff; mostly for testing, like:
+
+ >>> simplehtmldiff('test1', 'test2')
+ '-[test1 ]+[test2 ]'
+ >>> simplehtmldiff('<b>Hello world!</b>', '<i>Hello you!</i>')
+ '-[<b>]+[<i>]<i> Hello -[world! ]-[</b>]+[you! ]+[</i>]</i> '
+ """
+ h = SimpleHTMLMatcher(source1, source2)
+ return h.htmlDiff()
+
+class TextMatcher(HTMLMatcher):
+
+
+ def set_seq1(self, a):
+ SequenceMatcher.set_seq1(self, a.split('\n'))
+
+ def set_seq2(self, b):
+ SequenceMatcher.set_seq2(self, b.split('\n'))
+
+ def htmlDiff(self, addStylesheet=False):
+ opcodes = self.get_opcodes()
+ a = self.a
+ b = self.b
+ out = StringIO()
+ for tag, i1, i2, j1, j2 in opcodes:
+ if tag == 'equal':
+ self.writeLines(a[i1:i2], out)
+ if tag == 'delete' or tag == 'replace':
+ out.write(self.startDeleteText())
+ self.writeLines(a[i1:i2], out)
+ out.write(self.endDeleteText())
+ if tag == 'insert' or tag == 'replace':
+ out.write(self.startInsertText())
+ self.writeLines(b[j1:j2], out)
+ out.write(self.endInsertText())
+ html = out.getvalue()
+ out.close()
+ if addStylesheet:
+ html = self.addStylesheet(html, self.stylesheet())
+ return html
+
+ def writeLines(self, lines, out):
+ for line in lines:
+ line = htmlEncode(line)
+ line = line.replace(' ', ' ')
+ line = line.replace('\t', ' ')
+ if line.startswith(' '):
+ line = ' ' + line[1:]
+ out.write('<tt>%s</tt><br>\n' % line)
+
+if __name__ == '__main__':
+ import sys
+ if not sys.argv[1:]:
+ print "Usage: %s file1 file2" % sys.argv[0]
+ print "or to test: %s test" % sys.argv[0]
+ elif sys.argv[1] == 'test' and not sys.argv[2:]:
+ import doctest
+ doctest.testmod()
+ else:
+ print diffFiles(sys.argv[1], sys.argv[2])
+
Property changes on: Wiki/lib/htmldiff.py
___________________________________________________________________
Name: svn:executable
+ *
|