[Fasttran-checkins] fasttran/fasttran tm.py,1.1,1.2
Status: Alpha
Brought to you by:
a1s
|
From: alexander s. <a1...@us...> - 2007-02-20 06:48:12
|
Update of /cvsroot/fasttran/fasttran/fasttran In directory sc8-pr-cvs5.sourceforge.net:/tmp/cvs-serv10123 Modified Files: tm.py Log Message: use pickle protocol version 2; added TM meta information Index: tm.py =================================================================== RCS file: /cvsroot/fasttran/fasttran/fasttran/tm.py,v retrieving revision 1.1 retrieving revision 1.2 diff -C2 -d -r1.1 -r1.2 *** tm.py 13 Feb 2007 05:46:55 -0000 1.1 --- tm.py 20 Feb 2007 06:48:07 -0000 1.2 *************** *** 26,29 **** --- 26,31 ---- """ """History (most recent first): + 20-feb-2007 [als] use pickle protocol version 2; + added TM meta information 03-feb-2007 [als] created """ *************** *** 63,66 **** --- 65,74 ---- @ivar tgt: target language tag. + @type meta: C{dict} + @ivar meta: TM meta-information. + + At present, this dictionary has only one entry: + C{version} with value of C{1}. + @type delta: C{int} @ivar delta: maximum allowed length difference. *************** *** 156,159 **** --- 164,169 ---- - we'll need this later. + The C{words} table also contains TM meta-information dictionary. + Operations ---------- *************** *** 321,324 **** --- 331,349 ---- self._db_translations = anydbm.open(_db_prefix + ".translations", flag) self.load_wex("default.wex", src + ".wex") + _save_meta = False + if "" in self._db_words: + self.meta = loads(self._db_words[""]) + else: + self.meta = {} + _save_meta = True + if "version" not in self.meta: + self.meta["version"] = 1 + _save_meta = True + if _save_meta and (flag in "cwn"): + self.save_meta() + + def save_meta(self): + """Update TM meta information in the database""" + self._db_words[""] = dumps(self.meta, 2) def store(self, sentence, translation): *************** *** 354,358 **** return for _word in _words: ! _key = dumps((_word, len(_words))) if _key in self._db_words: self._db_words[_key] += "," + _id --- 379,383 ---- return for _word in _words: ! _key = dumps((_word, len(_words)), 2) if _key in self._db_words: self._db_words[_key] += "," + _id *************** *** 369,373 **** else: _translations[translation] = 1 ! self._db_translations[_id] = dumps(_translations) def lookup(self, sentence): --- 394,398 ---- else: _translations[translation] = 1 ! self._db_translations[_id] = dumps(_translations, 2) def lookup(self, sentence): *************** *** 455,459 **** _slen = _cnt + delta for _word in words: ! _ids = self._db_words.get(dumps((_word, _slen)), None) if _ids is None: _missing += 1 --- 480,484 ---- _slen = _cnt + delta for _word in words: ! _ids = self._db_words.get(dumps((_word, _slen), 2), None) if _ids is None: _missing += 1 |