[Fasttran-checkins] fasttran/fasttran tm.py,1.3,1.4
Status: Alpha
Brought to you by:
a1s
From: alexander s. <a1...@us...> - 2007-04-12 10:07:54
|
Update of /cvsroot/fasttran/fasttran/fasttran In directory sc8-pr-cvs5.sourceforge.net:/tmp/cvs-serv30608 Modified Files: tm.py Log Message: MS Word table cell delimiter added to word separators and to the head/tail separating regexp Index: tm.py =================================================================== RCS file: /cvsroot/fasttran/fasttran/fasttran/tm.py,v retrieving revision 1.3 retrieving revision 1.4 diff -C2 -d -r1.3 -r1.4 *** tm.py 28 Feb 2007 09:53:40 -0000 1.3 --- tm.py 12 Apr 2007 10:07:51 -0000 1.4 *************** *** 26,29 **** --- 26,31 ---- """ """History (most recent first): + 12-apr-2007 [als] MS Word table cell delimiter added to word separators + and to the head/tail separating regexp 28-feb-2007 [als] .store: ignore the pair if source and target are the same 20-feb-2007 [als] use pickle protocol version 2; *************** *** 46,51 **** # NOTE: could be "\W", but i'm not sure if that can be trusted. # Anyway, \W would split by digits and this regexp does not ! # (dunno if that's good or bad). ! WORD_SEPARATOR = re.escape(" \t\r\n\\~`!@#$%^&*()-_=+|[]{};:'\"<>,./?") class TranslationMemory(object): --- 48,54 ---- # NOTE: could be "\W", but i'm not sure if that can be trusted. # Anyway, \W would split by digits and this regexp does not ! # (dunno if that's good or bad). <als> ! # NOTE: \x07 is terminates table cell text in MS Word. <als> ! WORD_SEPARATOR = re.escape(" \x07\t\r\n\\~`!@#$%^&*()-_=+|[]{};:'\"<>,./?") class TranslationMemory(object): *************** *** 255,260 **** re_wchar = re.compile("[^" + WORD_SEPARATOR + "]", re.UNICODE) re_spaces = re.compile(" {2,}") ! re_headtail = re.compile("(?P<head>\s*)(?P<body>.*?)(?P<tail>\s*)$", ! re.UNICODE) # defaults --- 258,263 ---- re_wchar = re.compile("[^" + WORD_SEPARATOR + "]", re.UNICODE) re_spaces = re.compile(" {2,}") ! re_headtail = re.compile( ! r"(?P<head>[\s\x07]*)(?P<body>.*?)(?P<tail>[\s\x07]*)$", re.UNICODE) # defaults |