From: <pl...@do...> - 2007-06-29 11:35:29
|
Dear all, I've recently been working with some large datasets in RAP, persisted =20 in a MySQL database. I found when using the FindIterator class that it =20 would not scale to particularly large resultsets (e.g. =20 $dbmodel->iterFind( null, null, null ) gets proportionally quite slow =20 when you get beyond ~500 results!) After investigating the reason for =20 this, I found that the FindIterator was causing the DbModel to query =20 the database server for each iteration, advancing the offset in the =20 LIMIT clause by one each time. This has led me to write the attached MysqlFindIterator class, which =20 bypasses the RAP API, and instead queries the database directly. It =20 scales in approximately linear time, so is useful for these large =20 resultsets. At the moment I've only written it to handle MySQL =20 databases, but it should be easily modifiable for whatever DB Driver =20 you wish to use. I'd be happy for the developers to include this code or any derivative =20 work in the RAP API as bundled, if you think there's suffifcient call =20 for it :) Thanks to all, and keep up the good work! Cheers, Peter ______________________________________________________ MysqlFindIterator.class.php: <?php /* * Find Iterator operating over a MySQL Database, to operate in ~linear ti= me * Particular application in iterating over large datasets, performs faste= r * + in a more scalable manner than the existing FindIterator class. * * Author: Peter Coetzee * */ class MysqlFindIter { private $curr; private $processed; private $res; private $dbconn; private $dummy; // Pre: $dbconn is connected, and a database has been selected // Post: Prepare Iterator for operation. If $ordered =3D=3D true, // select rows ordered by subject then predicate, for ease // of processing. public function MysqlFindIter( $dbconn, $modelURI, $ordered=3Dfalse= , $sub=3Dnull, $pred=3Dnull, $obj=3Dnu= ll ) { $this->dbconn =3D $dbconn; $sql =3D "SELECT statements.* FROM statements, models " . "WHERE statements.modelID=3Dmodels.modelID AND " . "models.modelURI =3D '$modelURI'"; if ( $sub !=3D null ) $sql .=3D " AND subject =3D '$sub'"; if ( $pred !=3D null ) $sql .=3D " AND predicate =3D '$pred'"; if ( $obj !=3D null ) $sql .=3D " AND object =3D '$obj'"; if ( $ordered ) $sql .=3D " ORDER BY subject, predicate"; echo $sql; $this->res =3D mysql_query( $sql ) or die( "MySQL error: " . mysql_error ); $this->dummy =3D ModelFactory::getDefaultModel(); } // Post: Return the next statement in the Iterator public function next() { if ( $this->curr =3D=3D null && !$this->hasNext() ) return null; if ( $this->curr['object_is'] =3D=3D 'l' ) { $object =3D new Literal( $this->curr['object'] ); if ( $this->curr['l_datatype'] !=3D '' && $this->curr['l_datatype'] !=3D null ) $object->setDatatype( $this->curr['l_datatype'] )= ; } else if ( $this->curr['object_is'] =3D=3D 'b' ) { $object =3D new BlankNode( $this->dummy ); } else { $object =3D new Resource( $this->curr['object'] ); } $predicate =3D new Resource( $this->curr['predicate'] ); if ( $this->curr['subject_is'] =3D=3D 'r' ) $subject =3D new Resource( $this->curr['subject'] )= ; else $subject =3D new BlankNode( $this->dummy ); $this->curr =3D null; return new Statement( $subject, $predicate, $object ); } // Post: True iff there is another Statement in the Iterator public function hasNext() { return $this->curr =3D=3D null ? $this->curr =3D mysql_fetch_assoc( $this->res ) : t= rue; } // Post: Answer the number of Statements in the Iterator public function size() { return mysql_num_rows( $this->res ); } // Post: Free up resources used by this Iterator. Behaviour of any // subsequent calls is undefined. public function free() { mysql_free_result( $this->res ); } } ?> |