|
From: <fri...@us...> - 2008-12-23 17:37:42
|
Revision: 9629
http://zaf.svn.sourceforge.net/zaf/?rev=9629&view=rev
Author: friedelwolff
Date: 2008-12-23 17:37:30 +0000 (Tue, 23 Dec 2008)
Log Message:
-----------
provide a sort key for Affix so that we don't need to override __cmp__. Sorting is very fast, and we actually remove all duplicates now. (There were thousands of duplicates left with the old implementation due to incorrect sorting.)
Modified Paths:
--------------
trunk/dict/zu/hunspell/hunspell_format.py
Modified: trunk/dict/zu/hunspell/hunspell_format.py
===================================================================
--- trunk/dict/zu/hunspell/hunspell_format.py 2008-12-23 15:23:29 UTC (rev 9628)
+++ trunk/dict/zu/hunspell/hunspell_format.py 2008-12-23 17:37:30 UTC (rev 9629)
@@ -118,19 +118,17 @@
def remove_duplicates(self):
"""Remove duplicate rules and return the trimmed rules list."""
- rules = self.rules[:]
+ rules = self.rules #XXX: not making a copy!
if not rules:
return rules
- rules.sort()
+ self.rules.sort(key=lambda x: x._sort_key())
- before = rules[0]
+ trimmed = [rules[0]]
for i in rules[1:]:
- if i == before:
- rules.remove(i)
- before = i
+ if i._sort_key() != trimmed[-1]._sort_key():
+ trimmed.append(i)
+ return trimmed
- return rules
-
def add_rule(self, **kwargs):
"""Inserts a new rule in this affix class"""
new_rule = Affix()
@@ -170,16 +168,13 @@
self.circumfix = False
self.morphology = ""
- def __cmp__(self, other):
- ret = 0
- for column in ("affix", "strip", "condition", "morphology"):
- a = getattr(self, column, "")
- b = getattr(other, column, "")
- ret = cmp(a, b)
- if ret:
- return ret
- return ret
+ def _sort_key(self):
+ """Return a key suitable for sorting"""
+ return (self.affix, self.strip, self.condition, self.morphology)
+# def __cmp__(self, other):
+# return cmp(self._sort_key(), other._sort_key())
+
def __str__(self):
return "(group=%s, strip=%s, affix=%s, condition=%s)" % \
(self.group.flag, self.strip, self.affix, self.condition)
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|