Update of /cvsroot/tess/TheSpamSecretary
In directory sc8-pr-cvs1:/tmp/cvs-serv16016
Modified Files:
TheSpamSecretary.py
Log Message:
Strip comments out of multipart html.
Index: TheSpamSecretary.py
===================================================================
RCS file: /cvsroot/tess/TheSpamSecretary/TheSpamSecretary.py,v
retrieving revision 1.11
retrieving revision 1.12
diff -C2 -d -r1.11 -r1.12
*** TheSpamSecretary.py 13 Jan 2003 00:15:49 -0000 1.11
--- TheSpamSecretary.py 1 Apr 2003 05:02:36 -0000 1.12
***************
*** 491,498 ****
--- 491,501 ----
#print("TYPE: %s" % onePart.gettype())
if (not (re.search("application", onePart.gettype()) or re.search("image", onePart.gettype()))):
+
try:
mimetools.decode(multiFile, outputData, onePart.getencoding())
except:#
self.logFile.write("Failed to decode something of type %s\n" % onePart.getencoding())
+ if (re.search("html", onePart.gettype())):
+ outputData = self.stripComments(outputData.getvalue());
#else:
# print("NO DECODE")
***************
*** 511,514 ****
--- 514,528 ----
#print("oneline: %s" % oneLine)
return(count)
+
+ ##################################################
+
+ def stripComments(self, someText):
+ """
+ Strip the comments from an html mime part. Returns a StringIO.
+ """
+ outputData = StringIO.StringIO()
+ commentRE = re.compile("<!--.*?-->", re.DOTALL | re.MULTILINE)
+ outputData.write(commentRE.sub('', someText))
+ return outputData
##################################################
|