Re: [Modeling-users] Performance Boost
Status: Abandoned
Brought to you by:
sbigaret
From: Sebastien B. <sbi...@us...> - 2003-05-27 01:16:11
|
Yannick Gingras <yan...@sa...> wrote: > After tweaking on my side to find a better implementation of DOM in > terms of performance I found that xml.dom.minidom.parseString() is > near 3 times faster than Sax2.Reader().fromStream().=20 >=20 > Since Modeling uses Sax2 in ModelSet to parse the XML model you might > want to try this patch :=20 [snipped] Wooho, great! My measures show that it makes loading a xml-model more than 2x faster. [measure made w/: t0=3Dtime.time() for i in range(10): ms=3DModelSet.ModelSet() ms.addModelFromXML({'file': 'xmlmodels/model_StoreEmployees.xml'}) t1=3Dtime.time() print t1-t0 ] Know what? It seems you stopped just a few seconds before the (x)path ends ;) > + _attrNames =3D map(lambda attrPair:attrPair[0], _attrNode.items()) > + for attributeName in _attrNames: > attrType=3Dself.xmlAttributeType(attributeName) > set=3Dself.xmlSetAttribute(attributeName) > value=3Dxpath.Evaluate(attrType+'(@'+attributeName+')',=20 --> aNode.attributes.items() is exactly what we are looking for: names and corresponding values, hence we can also get rid of most of the xpath.Evaluate() as well... and now I can observe a performance boost of more than 6x ! If some of you could also apply the patch and test it against their models (it passes the tests) this will be integrated in cvs for the next release. I'd like to hear about the performance gain you observed as well, if possible. Thanks a lot Yannick, nice job indeed. Now I know what a experienced xml-developper can do :) -- S=E9bastien. Index: Attribute.py =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D RCS file: /cvsroot/modeling/ProjectModeling/Modeling/Attribute.py,v retrieving revision 1.12 diff -u -r1.12 Attribute.py --- Attribute.py 22 Apr 2003 09:31:56 -0000 1.12 +++ Attribute.py 27 May 2003 00:49:48 -0000 @@ -532,24 +532,20 @@ Initializes a model with the supplied xml.dom.node. =20 """ - _attrDict=3Dself.xmlAttributesDict() - _attrNode=3DaNode.attributes - attributes=3D[attr.name for attr in aNode.attributes] - + k_v=3DaNode.attributes.items() # Now we must make sure that the type is initialized BEFORE the default # value is set --> we simply make sure that this will be the first one # to be initialized try: - t=3D[a for a in attributes if a=3D=3D'type'][0] #IndexError - attributes.remove(t) - attributes=3D[t]+attributes + t=3D[a for a in k_v if a[0]=3D=3D'type'][0] #IndexError + k_v.remove(t) + k_v=3D[t]+k_v except IndexError: pass =20=20=20=20=20 - for attributeName in attributes: + for attributeName, value in k_v: # Iterate on attributes declared in node attrType=3Dself.xmlAttributeType(attributeName) set=3Dself.xmlSetAttribute(attributeName) - value=3Dxpath.Evaluate(attrType+'(@'+attributeName+')', contextNode= =3DaNode) if attrType=3D=3D'string': value=3DunicodeToStr(value, encoding) set(value) =20 Index: Entity.py =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D RCS file: /cvsroot/modeling/ProjectModeling/Modeling/Entity.py,v retrieving revision 1.11 diff -u -r1.11 Entity.py --- Entity.py 22 Apr 2003 09:31:56 -0000 1.11 +++ Entity.py 27 May 2003 00:50:14 -0000 @@ -1258,13 +1258,12 @@ raise XMLImportError, "Cannot initialize a non-empty entity"=20 if phase not in (1,2): raise ValueError, 'Ooops, parameter phase should be 1 or 2!' - _attrDict=3Dself.xmlAttributesDict() - _attrNode=3DaNode.attributes - for attributeName in [attr.name for attr in aNode.attributes]: + + k_v=3DaNode.attributes.items() + for attributeName, value in k_v: # Iterate on attributes which are in the xml attrType=3Dself.xmlAttributeType(attributeName) set=3Dself.xmlSetAttribute(attributeName) - value=3Dxpath.Evaluate(attrType+'(@'+attributeName+')', contextNode= =3DaNode) if attrType=3D=3D'string': value=3DunicodeToStr(value, encoding) set(value) =20=20=20=20=20=20=20 Index: Model.py =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D RCS file: /cvsroot/modeling/ProjectModeling/Modeling/Model.py,v retrieving revision 1.4 diff -u -r1.4 Model.py --- Model.py 22 Apr 2003 09:31:56 -0000 1.4 +++ Model.py 27 May 2003 00:50:14 -0000 @@ -240,12 +240,11 @@ """ if self.entities(): raise XMLImportError, "Cannot initialize a non-empty model"=20 - _attrDict=3Dself.xmlAttributesDict() - _attrNode=3DaNode.attributes - for attributeName in [attr.name for attr in aNode.attributes]: + + k_v=3DaNode.attributes.items() + for attributeName, value in k_v: attrType=3Dself.xmlAttributeType(attributeName) set=3Dself.xmlSetAttribute(attributeName) - value=3Dxpath.Evaluate(attrType+'(@'+attributeName+')', contextNode= =3DaNode) if attrType=3D=3D'string': value=3DunicodeToStr(value, encoding) set(value) =20 Index: ModelSet.py =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D RCS file: /cvsroot/modeling/ProjectModeling/Modeling/ModelSet.py,v retrieving revision 1.7 diff -u -r1.7 ModelSet.py --- ModelSet.py 7 May 2003 11:28:45 -0000 1.7 +++ ModelSet.py 27 May 2003 00:50:14 -0000 @@ -54,6 +54,7 @@ NC=3DNotificationCenter import ClassDescription import types +from xml.dom.minidom import parseString =20 from logging import error, warn =20 @@ -220,14 +221,12 @@ #import pdb; pdb.set_trace() if xmlSource.has_key('string'): encoding=3DautoDetectXMLEncoding(xmlSource['string']) - reader=3DSax2.Reader() - xmldoc=3Dreader.fromString(xmlSource['string']) + xmldoc=3DparseString(xmlSource['string']) elif xmlSource.has_key('file'): f=3Dopen(xmlSource['file'], 'rb') encoding=3DautoDetectXMLEncoding(f.read()) f.close() - reader=3DSax2.Reader() - xmldoc=3Dreader.fromStream(xmlSource['file']) + xmldoc=3DparseString(open(xmlSource['file']).read()) else: raise AttributeError, "xmlSource parameter has no key 'string' or 'f= ile'" # Do we have only one model? _TBD: a DTD should be responsible for thi= s! Index: Relationship.py =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D RCS file: /cvsroot/modeling/ProjectModeling/Modeling/Relationship.py,v retrieving revision 1.8 diff -u -r1.8 Relationship.py --- Relationship.py 22 Apr 2003 09:31:56 -0000 1.8 +++ Relationship.py 27 May 2003 00:50:23 -0000 @@ -242,14 +242,13 @@ """ Initializes a relationship with the supplied xml.dom.node. """ - _attrDict=3Dself.xmlAttributesDict() - _attrNode=3DaNode.attributes - for attributeName in [attr.name for attr in aNode.attributes]: + k_v=3DaNode.attributes.items() + for attributeName, value in k_v: # Iterate on attributes declared in node attrType=3Dself.xmlAttributeType(attributeName) set=3Dself.xmlSetAttribute(attributeName) - value=3Dxpath.Evaluate(attrType+'(@'+attributeName+')', contextNode= =3DaNode) if attrType=3D=3D'string': value=3DunicodeToStr(value, encoding) + if attrType=3D=3D'number': value=3Dint(value) set(value) =20=20=20=20=20=20=20 return Index: XMLutils.py =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D RCS file: /cvsroot/modeling/ProjectModeling/Modeling/XMLutils.py,v retrieving revision 1.3 diff -u -r1.3 XMLutils.py --- XMLutils.py 14 Mar 2003 11:40:10 -0000 1.3 +++ XMLutils.py 27 May 2003 00:50:24 -0000 @@ -50,11 +50,6 @@ pass =20 try: - from xml.dom.ext.reader import Sax2 - from xml.dom.ext.reader.Sax import FromXmlStream -except: - raise 'ImportError', 'PyXML is not installed' -try: from xml import xpath except: raise 'ImportError', 'XPath is not installed' |