From: <fri...@us...> - 2008-01-29 16:07:52
|
Revision: 6962 http://translate.svn.sourceforge.net/translate/?rev=6962&view=rev Author: friedelwolff Date: 2008-01-29 08:07:51 -0800 (Tue, 29 Jan 2008) Log Message: ----------- Remove unnecessary import and commented code. Update comments. Modified Paths: -------------- src/trunk/translate/search/terminology.py Modified: src/trunk/translate/search/terminology.py =================================================================== --- src/trunk/translate/search/terminology.py 2008-01-29 16:05:37 UTC (rev 6961) +++ src/trunk/translate/search/terminology.py 2008-01-29 16:07:51 UTC (rev 6962) @@ -20,7 +20,6 @@ """A class that does terminology matching""" -from translate.search import segment import re # We don't want to miss certain forms of words that only change a little @@ -36,6 +35,8 @@ (" ", "-"), #pre order / pre-order ] +#TODO: compile regexes + class TerminologyComparer: def __init__(self, max_len=500): self.MAX_LEN = max_len @@ -43,12 +44,10 @@ def similarity(self, a, b, stoppercentage=40): """returns the match quality of term b in the text a""" # We could segment the words, but mostly it will give less ideal - # results, since we'll miss plurals, etc. We also can't search for - # multiword terms, such as "Free Software" + # results, since we'll miss plurals, etc. Then we also can't search for + # multiword terms, such as "Free Software". Ideally we should use a + # stemmer, like the Porter stemmer. - #words = segment.words(a) - #if b in words: - # So we just see if the word occurs anywhere. This is not perfect since # we might get more than we bargained for. The term "form" will be found # in the word "format", for example. A word like "at" will trigger too This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |