[Sqlalchemy-commits] [1388] sqlalchemy/branches/schema/lib/sqlalchemy: advanced datamapping doc, doi
Brought to you by:
zzzeek
From: <co...@sq...> - 2006-05-04 19:18:38
|
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd"> <html xmlns="http://www.w3.org/1999/xhtml"> <head><style type="text/css"><!-- #msg dl { border: 1px #006 solid; background: #369; padding: 6px; color: #fff; } #msg dt { float: left; width: 6em; font-weight: bold; } #msg dt:after { content:':';} #msg dl, #msg dt, #msg ul, #msg li { font-family: verdana,arial,helvetica,sans-serif; font-size: 10pt; } #msg dl a { font-weight: bold} #msg dl a:link { color:#fc3; } #msg dl a:active { color:#ff0; } #msg dl a:visited { color:#cc6; } h3 { font-family: verdana,arial,helvetica,sans-serif; font-size: 10pt; font-weight: bold; } #msg pre { overflow: auto; background: #ffc; border: 1px #fc0 solid; padding: 6px; } #msg ul, pre { overflow: auto; } #patch { width: 100%; } #patch h4 {font-family: verdana,arial,helvetica,sans-serif;font-size:10pt;padding:8px;background:#369;color:#fff;margin:0;} #patch .propset h4, #patch .binary h4 {margin:0;} #patch pre {padding:0;line-height:1.2em;margin:0;} #patch .diff {width:100%;background:#eee;padding: 0 0 10px 0;overflow:auto;} #patch .propset .diff, #patch .binary .diff {padding:10px 0;} #patch span {display:block;padding:0 10px;} #patch .modfile, #patch .addfile, #patch .delfile, #patch .propset, #patch .binary, #patch .copfile {border:1px solid #ccc;margin:10px 0;} #patch ins {background:#dfd;text-decoration:none;display:block;padding:0 10px;} #patch del {background:#fdd;text-decoration:none;display:block;padding:0 10px;} #patch .lines, .info {color:#888;background:#fff;} --></style> <title>[1388] sqlalchemy/branches/schema/lib/sqlalchemy: advanced datamapping doc, doing more with polymorphic...</title> </head> <body> <div id="msg"> <dl> <dt>Revision</dt> <dd>1388</dd> <dt>Author</dt> <dd>zzzeek</dd> <dt>Date</dt> <dd>2006-05-04 14:18:24 -0500 (Thu, 04 May 2006)</dd> </dl> <h3>Log Message</h3> <pre>advanced datamapping doc, doing more with polymorphic...</pre> <h3>Modified Paths</h3> <ul> <li><a href="#sqlalchemybranchesschemaexamplespolymorphpolymorphpy">sqlalchemy/branches/schema/examples/polymorph/polymorph.py</a></li> <li><a href="#sqlalchemybranchesschemalibsqlalchemyorm__init__py">sqlalchemy/branches/schema/lib/sqlalchemy/orm/__init__.py</a></li> <li><a href="#sqlalchemybranchesschemalibsqlalchemyormmapperpy">sqlalchemy/branches/schema/lib/sqlalchemy/orm/mapper.py</a></li> <li><a href="#sqlalchemybranchesschemalibsqlalchemyormsessionpy">sqlalchemy/branches/schema/lib/sqlalchemy/orm/session.py</a></li> <li><a href="#sqlalchemybranchesschemalibsqlalchemyormutilpy">sqlalchemy/branches/schema/lib/sqlalchemy/orm/util.py</a></li> <li><a href="#sqlalchemybranchesschemalibsqlalchemyschemapy">sqlalchemy/branches/schema/lib/sqlalchemy/schema.py</a></li> </ul> <h3>Added Paths</h3> <ul> <li><a href="#sqlalchemybranchesschemadocbuildcontentadv_datamappingtxt">sqlalchemy/branches/schema/doc/build/content/adv_datamapping.txt</a></li> <li><a href="#sqlalchemybranchesschemaexamplespolymorphconcretepy">sqlalchemy/branches/schema/examples/polymorph/concrete.py</a></li> <li><a href="#sqlalchemybranchesschemaexamplespolymorphsinglepy">sqlalchemy/branches/schema/examples/polymorph/single.py</a></li> </ul> </div> <div id="patch"> <h3>Diff</h3> <a id="sqlalchemybranchesschemadocbuildcontentadv_datamappingtxt"></a> <div class="addfile"><h4>Added: sqlalchemy/branches/schema/doc/build/content/adv_datamapping.txt (1387 => 1388)</h4> <pre class="diff"><span> <span class="info">--- sqlalchemy/branches/schema/doc/build/content/adv_datamapping.txt 2006-05-04 04:38:20 UTC (rev 1387) +++ sqlalchemy/branches/schema/doc/build/content/adv_datamapping.txt 2006-05-04 19:18:24 UTC (rev 1388) </span><span class="lines">@@ -0,0 +1,814 @@ </span><ins>+Advanced Data Mapping {@name=advdatamapping} +====================== + +This section details all the options available to Mappers, as well as advanced patterns. + +To start, heres the tables we will work with again: + + {python} + from sqlalchemy import * + + metadata = MetaData() + + # a table to store users + users_table = Table('users', metadata, + Column('user_id', Integer, primary_key = True), + Column('user_name', String(40)), + Column('password', String(80)) + ) + + # a table that stores mailing addresses associated with a specific user + addresses_table = Table('addresses', metadata, + Column('address_id', Integer, primary_key = True), + Column('user_id', Integer, ForeignKey("users.user_id")), + Column('street', String(100)), + Column('city', String(80)), + Column('state', String(2)), + Column('zip', String(10)) + ) + + # a table that stores keywords + keywords_table = Table('keywords', metadata, + Column('keyword_id', Integer, primary_key = True), + Column('name', VARCHAR(50)) + ) + + # a table that associates keywords with users + userkeywords_table = Table('userkeywords', metadata, + Column('user_id', INT, ForeignKey("users")), + Column('keyword_id', INT, ForeignKey("keywords")) + ) +### Overriding Properties {@name=overriding} + +A common request is the ability to create custom class properties that override the behavior of setting/getting an attribute. Currently, the easiest way to do this in SQLAlchemy is how it would be done in any Python program; define your attribute with a different name, such as "_attribute", and use a property to get/set its value. The mapper just needs to be told of the special name: + + {python} + class MyClass(object): + def _set_email(self, email): + self._email = email + def _get_email(self, email): + return self._email + email = property(_get_email, _set_email) + + mapper(MyClass, mytable, properties = { + # map the '_email' attribute to the "email" column + # on the table + '_email': mytable.c.email + }) + +In a later release, SQLAlchemy will also allow _get_email and _set_email to be attached directly to the "email" property created by the mapper, and will also allow this association to occur via decorators. + +### More On Relations {@name=relations} + +#### Custom Join Conditions {@name=customjoin} + +When creating relations on a mapper, most examples so far have illustrated the mapper and relationship joining up based on the foreign keys of the tables they represent. in fact, this "automatic" inspection can be completely circumvented using the `primaryjoin` and `secondaryjoin` arguments to `relation`, as in this example which creates a User object which has a relationship to all of its Addresses which are in Boston: + + {python} + class User(object): + pass + class Address(object): + pass + + mapper(Address, addresses) + mapper(User, users, properties={ + 'boston_addreses' : relation(Address, primaryjoin= + and_(users.c.user_id==Address.c.user_id, + Addresses.c.city=='Boston')) + }) + +Many to many relationships can be customized by one or both of `primaryjoin` and `secondaryjoin`, shown below with just the default many-to-many relationship explicitly set: + + {python} + class User(object): + pass + class Keyword(object): + pass + Keyword.mapper = mapper(Keyword, keywords) + User.mapper = mapper(User, users, properties={ + 'keywords':relation(Keyword.mapper, + primaryjoin=users.c.user_id==userkeywords.c.user_id, + secondaryjoin=userkeywords.c.keyword_id==keywords.c.keyword_id + ) + }) + +### Lazy/Eager Joins Multiple Times to One Table {@name=multiplejoin} + +The previous example leads in to the idea of joining against the same table multiple times. Below is a User object that has lists of its Boston and New York addresses, both lazily loaded when they are first accessed: + + {python} + User.mapper = mapper(User, users, properties={ + 'boston_addreses' : relation(Address.mapper, primaryjoin= + and_(users.c.user_id==Address.c.user_id, + Addresses.c.city=='Boston')), + 'newyork_addresses' : relation(Address.mapper, primaryjoin= + and_(users.c.user_id==Address.c.user_id, + Addresses.c.city=='New York')), + }) + +A complication arises with the above pattern if you want the relations to be eager loaded. Since there will be two separate joins to the addresses table during an eager load, an alias needs to be used to separate them. You can create an alias of the addresses table to separate them, but then you are in effect creating a brand new mapper for each property, unrelated to the main Address mapper, which can create problems with commit operations. So an additional argument `use_alias` can be used with an eager relationship to specify the alias to be used just within the eager query: + + {python} + User.mapper = mapper(User, users, properties={ + 'boston_addreses' : relation(Address.mapper, primaryjoin= + and_(User.c.user_id==Address.c.user_id, + Addresses.c.city=='Boston'), lazy=False, use_alias=True), + 'newyork_addresses' : relation(Address.mapper, primaryjoin= + and_(User.c.user_id==Address.c.user_id, + Addresses.c.city=='New York'), lazy=False, use_alias=True), + }) + + {sql}u = User.mapper.select() + + <&|formatting.myt:codepopper, link="sql" &> + SELECT users.user_id AS users_user_id, users.user_name AS users_user_name, + users.password AS users_password, + addresses_EF45.address_id AS addresses_EF45_address_id, addresses_EF45.user_id AS addresses_EF45_user_id, + addresses_EF45.street AS addresses_EF45_street, addresses_EF45.city AS addresses_EF45_city, + addresses_EF45.state AS addresses_EF45_state, addresses_EF45.zip AS addresses_EF45_zip, + addresses_63C5.address_id AS addresses_63C5_address_id, addresses_63C5.user_id AS addresses_63C5_user_id, + addresses_63C5.street AS addresses_63C5_street, addresses_63C5.city AS addresses_63C5_city, + addresses_63C5.state AS addresses_63C5_state, addresses_63C5.zip AS addresses_63C5_zip + FROM users + LEFT OUTER JOIN addresses AS addresses_EF45 ON users.user_id = addresses_EF45.user_id + AND addresses_EF45.city = :addresses_city + LEFT OUTER JOIN addresses AS addresses_63C5 ON users.user_id = addresses_63C5.user_id + AND addresses_63C5.city = :addresses_city_1 + ORDER BY users.oid, addresses_EF45.oid, addresses_63C5.oid + {'addresses_city_1': 'New York', 'addresses_city': 'Boston'} + +#### Relation Options {@name=relationoptions} + +Keyword options to the `relation` function include: + +* lazy=(True|False|None) - specifies how the related items should be loaded. a value of True indicates they should be loaded when the property is first accessed. A value of False indicates they should be loaded by joining against the parent object query, so parent and child are loaded in one round trip. A value of None indicates the related items are not loaded by the mapper in any case; the application will manually insert items into the list in some other way. A relationship with lazy=None is still important; items added to the list or removed will cause the appropriate updates and deletes upon commit(). +* secondary - for a many-to-many relationship, specifies the intermediary table. +* primaryjoin - a ClauseElement that will be used as the primary join of this child object against the parent object, or in a many-to-many relationship the join of the primary object to the association table. By default, this value is computed based on the foreign key relationships of the parent and child tables (or association table). +* secondaryjoin - a ClauseElement that will be used as the join of an association table to the child object. By default, this value is computed based on the foreign key relationships of the association and child tables. +* foreignkey - specifies which column in this relationship is "foreign", i.e. which column refers to the parent object. This value is automatically determined in all cases, based on the primary and secondary join conditions, except in the case of a self-referential mapper, where it is needed to indicate the child object's reference back to it's parent. +* uselist - a boolean that indicates if this property should be loaded as a list or a scalar. In most cases, this value is determined based on the type and direction of the relationship - one to many forms a list, one to one forms a scalar, many to many is a list. If a scalar is desired where normally a list would be present, set uselist to False. +* private - indicates if these child objects are "private" to the parent; removed items will also be deleted, and if the parent item is deleted, all child objects are deleted as well. See the example in <&formatting.myt:link, path="datamapping_relations_private"&>. +* backreference - indicates the name of a property to be placed on the related mapper's class that will handle this relationship in the other direction, including synchronizing the object attributes on both sides of the relation. See the example in <&formatting.myt:link, path="datamapping_relations_backreferences"&>. +* order_by - indicates the ordering that should be applied when loading these items. See the section [advdatamapping_orderby](rel:advdatamapping_orderby) for details. +* association - When specifying a many to many relationship with an association object, this keyword should reference the mapper of the target object of the association. See the example in [datamapping_association](rel:datamapping_association). +* post_update - this indicates that the relationship should be handled by a second UPDATE statement after an INSERT, or before a DELETE. using this flag essentially means the relationship will not incur any "dependency" between parent and child item, as the particular foreign key relationship between them is handled by a second statement. use this flag when a particular mapping arrangement will incur two rows that are dependent on each other, such as a table that has a one-to-many relationship to a set of child rows, and also has a column that references a single child row within that list (i.e. both tables contain a foreign key to each other). If a flush() operation returns an error that a "cyclical dependency" was detected, this is a cue that you might want to use post_update. + +### Controlling Ordering {@name=orderby} + +By default, mappers will attempt to ORDER BY the "oid" column of a table, or the primary key column, when selecting rows. This can be modified in several ways. + +The "order_by" parameter can be sent to a mapper, overriding the per-engine ordering if any. A value of None means that the mapper should not use any ordering. A non-None value, which can be a column, an `asc` or `desc` clause, or an array of either one, indicates the ORDER BY clause that should be added to all select queries: + + {python} + # disable all ordering + mapper = mapper(User, users, order_by=None) + + # order by a column + mapper = mapper(User, users, order_by=users.c.user_id) + + # order by multiple items + mapper = mapper(User, users, order_by=[users.c.user_id, desc(users.c.user_name)]) + +"order_by" can also be specified to an individual `select` method, overriding all other per-engine/per-mapper orderings: + + {python} + # order by a column + l = mapper.select(users.c.user_name=='fred', order_by=users.c.user_id) + + # order by multiple criterion + l = mapper.select(users.c.user_name=='fred', order_by=[users.c.user_id, desc(users.c.user_name)]) + +For relations, the "order_by" property can also be specified to all forms of relation: + + {python} + # order address objects by address id + mapper = mapper(User, users, properties = { + 'addresses' : relation(mapper(Address, addresses), order_by=addresses.c.address_id) + }) + + # eager load with ordering - the ORDER BY clauses of parent/child will be organized properly + mapper = mapper(User, users, properties = { + 'addresses' : relation(mapper(Address, addresses), order_by=desc(addresses.c.email_address), eager=True) + }, order_by=users.c.user_id) + +### Limiting Rows {@name=limits} + +You can limit rows in a regular SQL query by specifying `limit` and `offset`. A Mapper can handle the same concepts: + + {python} + class User(object): + pass + + mapper(User, users) + {sql}r = session.query(User).select(limit=20, offset=10) + SELECT users.user_id AS users_user_id, + users.user_name AS users_user_name, users.password AS users_password + FROM users ORDER BY users.oid + LIMIT 20 OFFSET 10 + {} + +However, things get tricky when dealing with eager relationships, since a straight LIMIT of rows does not represent the count of items when joining against other tables to load related items as well. So here is what SQLAlchemy will do when you use limit or offset with an eager relationship: + + {python} + class User(object): + pass + class Address(object): + pass + mapper(User, users, properties={ + 'addresses' : relation(mapper(Address, addresses), lazy=False) + }) + r = session.query(User).select(User.c.user_name.like('F%'), limit=20, offset=10) + {opensql}SELECT users.user_id AS users_user_id, users.user_name AS users_user_name, + users.password AS users_password, addresses.address_id AS addresses_address_id, + addresses.user_id AS addresses_user_id, addresses.street AS addresses_street, + addresses.city AS addresses_city, addresses.state AS addresses_state, + addresses.zip AS addresses_zip + FROM + (SELECT users.user_id FROM users WHERE users.user_name LIKE %(users_user_name)s + ORDER BY users.oid LIMIT 20 OFFSET 10) AS rowcount, + users LEFT OUTER JOIN addresses ON users.user_id = addresses.user_id + WHERE rowcount.user_id = users.user_id ORDER BY users.oid, addresses.oid + {'users_user_name': 'F%'} + +The main WHERE clause as well as the limiting clauses are coerced into a subquery; this subquery represents the desired result of objects. A containing query, which handles the eager relationships, is joined against the subquery to produce the result. + +### Overriding Column Names {@name=colname} + +When mappers are constructed, by default the column names in the Table metadata are used as the names of attributes on the mapped class. This can be customzed within the properties by stating the key/column combinations explicitly: + + {python} + user_mapper = mapper(User, users, properties={ + 'id' : users.c.user_id, + 'name' : users.c.user_name, + }) + +In the situation when column names overlap in a mapper against multiple tables, columns may be referenced together with a list: + + {python} + # join users and addresses + usersaddresses = sql.join(users, addresses, users.c.user_id == addresses.c.user_id) + m = mapper(User, usersaddresses, + properties = { + 'id' : [users.c.user_id, addresses.c.user_id], + } + ) + +### Deferred Column Loading {@name=deferred} + +This feature allows particular columns of a table to not be loaded by default, instead being loaded later on when first referenced. It is essentailly "column-level lazy loading". This feature is useful when one wants to avoid loading a large text or binary field into memory when its not needed. Individual columns can be lazy loaded by themselves or placed into groups that lazy-load together. + + {python} + book_excerpts = Table('books', db, + Column('book_id', Integer, primary_key=True), + Column('title', String(200), nullable=False), + Column('summary', String(2000)), + Column('excerpt', String), + Column('photo', Binary) + ) + + class Book(object): + pass + + # define a mapper that will load each of 'excerpt' and 'photo' in + # separate, individual-row SELECT statements when each attribute + # is first referenced on the individual object instance + mapper(Book, book_excerpts, properties = { + 'excerpt' : deferred(book_excerpts.c.excerpt), + 'photo' : deferred(book_excerpts.c.photo) + }) + +Deferred columns can be placed into groups so that they load together: + + {python} + book_excerpts = Table('books', db, + Column('book_id', Integer, primary_key=True), + Column('title', String(200), nullable=False), + Column('summary', String(2000)), + Column('excerpt', String), + Column('photo1', Binary), + Column('photo2', Binary), + Column('photo3', Binary) + ) + + class Book(object): + pass + + # define a mapper with a 'photos' deferred group. when one photo is referenced, + # all three photos will be loaded in one SELECT statement. The 'excerpt' will + # be loaded separately when it is first referenced. + mapper(Book, book_excerpts, properties = { + 'excerpt' : deferred(book_excerpts.c.excerpt), + 'photo1' : deferred(book_excerpts.c.photo1, group='photos'), + 'photo2' : deferred(book_excerpts.c.photo2, group='photos'), + 'photo3' : deferred(book_excerpts.c.photo3, group='photos') + }) + +### More on Mapper Options {@name=options} + +The `options` method on the `Query` object, first introduced in [datamapping_relations_options](rel:datamapping_relations_options), produces a new `Query` object by creating a copy of the underlying `Mapper` and placing modified properties on it. The `options` method is also directly available off the `Mapper` object itself, so that the newly copied `Mapper` can be dealt with directly. The `options` method takes a variable number of `MapperOption` objects which know how to change specific things about the mapper. The five available options are `eagerload`, `lazyload`, `noload`, `deferred` and `extension`. + +An example of a mapper with a lazy load relationship, upgraded to an eager load relationship: + + {python} + class User(object): + pass + class Address(object): + pass + + # a 'lazy' relationship + mapper(User, users, properties = { + 'addreses':relation(mapper(Address, addresses), lazy=True) + }) + + # copy the mapper and convert 'addresses' to be eager + eagermapper = class_mapper(User).options(eagerload('addresses')) + +The `defer` and `undefer` options can control the deferred loading of attributes: + + {python} + # set the 'excerpt' deferred attribute to load normally + m = book_mapper.options(undefer('excerpt')) + + # set the referenced mapper 'photos' to defer its loading of the column 'imagedata' + m = book_mapper.options(defer('photos.imagedata')) + +### Mapping a Class with Table Inheritance {@name=inheritance} + +Inheritance in databases comes in three forms: *single table inheritance*, where several types of classes are stored in one table, *concrete table inheritance*, where each type of class is stored in its own table, and *multiple table inheritance*, where the parent/child classes are stored in their own tables that are joined together in a select. + +There is also a concept of `polymorphic` loading, which indicates if multiple kinds of classes can be loaded in one pass. + +SQLAlchemy supports all three kinds of inheritance. Additionally, true `polymorphic` loading is supported in a straightfoward way for single table inheritance, and has some more manually-configured features that can make it happen for concrete and multiple table inheritance. + +Here are the classes we will use to represent an inheritance relationship: + + {python} + class Employee(object): + def __init__(self, name): + self.name = name + def __repr__(self): + return self.__class__.__name__ + " " + self.name + + class Manager(Employee): + def __init__(self, name, manager_data): + self.name = name + self.manager_data = manager_data + def __repr__(self): + return self.__class__.__name__ + " " + self.name + " " + self.manager_data + + class Engineer(Employee): + def __init__(self, name, engineer_info): + self.name = name + self.engineer_info = engineer_info + def __repr__(self): + return self.__class__.__name__ + " " + self.name + " " + self.engineer_info + +Each class supports a common `name` attribute, while the `Manager` class has its own attribute `manager_data` and the `Engineer` class has its own attribute `engineer_info`. + +#### Single Table Inheritance + +This will support polymorphic loading via the `Employee` mapper. + + {python} + employees_table = Table('employees', metadata, + Column('employee_id', Integer, primary_key=True), + Column('name', String(50)), + Column('manager_data', String(50)), + Column('engineer_info', String(50)), + Column('type', String(20)) + ) + + employee_mapper = mapper(Employee, employees_table, polymorphic_on=employees_table.c.type) + manager_mapper = mapper(Manager, inherits=employee_mapper, polymorphic_identity='manager') + engineer_mapper = mapper(Engineer, inherits=employee_mapper, polymorphic_identity='engineer') + +#### Concrete Table Inheritance + +Without polymorphic loading, you just define a separate mapper for each class. + + {python} + managers_table = Table('managers', metadata, + Column('employee_id', Integer, primary_key=True), + Column('name', String(50)), + Column('manager_data', String(50)), + ) + + engineers_table = Table('engineers', metadata, + Column('employee_id', Integer, primary_key=True), + Column('name', String(50)), + Column('engineer_info', String(50)), + ) + + manager_mapper = mapper(Manager, managers_table) + engineer_mapper = mapper(Engineer, engineers_table) + +With polymorphic loading, the SQL query to do the actual polymorphic load must be constructed, usually as a UNION. Additionally, each query composed into the UNION must have the same set of column names, as well as an extra column to indicate the "polymorphic identity" of each row: + + {python} + colnames = ['employee_id', 'name', 'manager_data', 'engineer_info'] + def col(name, table): + try: + return table.c[name] + except KeyError: + return null().label(name) + + pjoin = select( + [col(name, managers_table) for name in colnames] + [column("'manager'").label('type')] + ).union_all( + select( + [col(name, engineers_table) for name in colnames] + [column("'engineer'").label('type')] + )).alias('pjoin') + + employee_mapper = mapper(Employee, pjoin, polymorphic_on=pjoin.c.type) + manager_mapper = mapper(Manager, managers_table, inherits=employee_mapper, concrete=True, polymorphic_ident='manager') + engineer_mapper = mapper(Engineer, engineers_table, inherits=employee_mapper, concrete=True, polymorphic_ident='engineer') + + + {python} +Table Inheritance indicates the pattern where two tables, in a parent-child relationship, are mapped to an inheritance chain of classes. If a table "employees" contains additional information about managers in the table "managers", a corresponding object inheritance pattern would have an Employee class and a Manager class. Loading a Manager object means you are joining managers to employees. For SQLAlchemy, this pattern is just a special case of a mapper that maps against a joined relationship, and is provided via the `inherits` keyword. + {python} + class User(object): + """a user object.""" + pass + User.mapper = mapper(User, users) + + class AddressUser(User): + """a user object that also has the users mailing address.""" + pass + + # define a mapper for AddressUser that inherits the User.mapper, and joins on the user_id column + AddressUser.mapper = mapper( + AddressUser, + addresses, inherits=User.mapper + ) + + items = AddressUser.mapper.select() + +Above, the join condition is determined via the foreign keys between the users and the addresses table. To specify the join condition explicitly, use `inherit_condition`: +{python} + AddressUser.mapper = mapper( + AddressUser, + addresses, inherits=User.mapper, + inherit_condition=users.c.user_id==addresses.c.user_id + ) +### Mapping a Class against Multiple Tables {@name=joins} + The more general case of the pattern described in "table inheritance" is a mapper that maps against more than one table. The `join` keyword from the SQL package creates a neat selectable unit comprised of multiple tables, complete with its own composite primary key, which can be passed in to a mapper as the table. + {python} + # a class + class AddressUser(object): + pass + + # define a Join + j = join(users, addresses) + + # map to it - the identity of an AddressUser object will be + # based on (user_id, address_id) since those are the primary keys involved + m = mapper(AddressUser, j) + + + A second example: + {python} + # many-to-many join on an association table + j = join(users, userkeywords, + users.c.user_id==userkeywords.c.user_id).join(keywords, + userkeywords.c.keyword_id==keywords.c.keyword_id) + + # a class + class KeywordUser(object): + pass + + # map to it - the identity of a KeywordUser object will be + # (user_id, keyword_id) since those are the primary keys involved + m = mapper(KeywordUser, j) +### Mapping a Class against Arbitary Selects {@name=selects} +Similar to mapping against a join, a plain select() object can be used with a mapper as well. Below, an example select which contains two aggregate functions and a group_by is mapped to a class: + {python} + s = select([customers, + func.count(orders).label('order_count'), + func.max(orders.price).label('highest_order')], + customers.c.customer_id==orders.c.customer_id, + group_by=[c for c in customers.c] + ) + class Customer(object): + pass + + mapper = mapper(Customer, s) + +Above, the "customers" table is joined against the "orders" table to produce a full row for each customer row, the total count of related rows in the "orders" table, and the highest price in the "orders" table, grouped against the full set of columns in the "customers" table. That query is then mapped against the Customer class. New instances of Customer will contain attributes for each column in the "customers" table as well as an "order_count" and "highest_order" attribute. Updates to the Customer object will only be reflected in the "customers" table and not the "orders" table. This is because the primary keys of the "orders" table are not represented in this mapper and therefore the table is not affected by save or delete operations. +### Multiple Mappers for One Class {@name=multiple} + By now it should be apparent that the mapper defined for a class is in no way the only mapper that exists for that class. Other mappers can be created at any time; either explicitly or via the `options` method, to provide different loading behavior. + + However, its not as simple as that. The mapper serves a dual purpose; one is to generate select statements and load objects from executing those statements; the other is to keep track of the defined dependencies of that object when save and delete operations occur, and to extend the attributes of the object so that they store information about their history and communicate with the unit of work system. For this reason, it is a good idea to be aware of the behavior of multiple mappers. When creating dependency relationships between objects, one should insure that only the primary mappers are used in those relationships, else deep object traversal operations will fail to load in the expected properties, and update operations will not take all the dependencies into account. + + Generally its as simple as, the <i>first</i> mapper that is defined for a particular class is the one that gets to define that classes' relationships to other mapped classes, and also decorates its attributes and constructors with special behavior. Any subsequent mappers created for that class will be able to load new instances, but object manipulation operations will still function via the original mapper. The special keyword `is_primary` will override this behavior, and make any mapper the new "primary" mapper. + + {python} + class User(object): + pass + + # mapper one - mark it as "primary", meaning this mapper will handle + # saving and class-level properties + m1 = mapper(User, users, is_primary=True) + + # mapper two - this one will also eager-load address objects in + m2 = mapper(User, users, properties={ + 'addresses' : relation(mapper(Address, addresses), lazy=False) + }) + + # get a user. this user will not have an 'addreses' property + u1 = m1.select(User.c.user_id==10) + + # get another user. this user will have an 'addreses' property. + u2 = m2.select(User.c.user_id==27) + + # make some modifications, including adding an Address object. + u1.user_name = 'jack' + u2.user_name = 'jane' + u2.addresses.append(Address('123 green street')) + + # upon commit, the User objects will be saved. + # the Address object will not, since the primary mapper for User + # does not have an 'addresses' relationship defined + objectstore.commit() +### Circular Mapping {@name=circular} +Oftentimes it is necessary for two mappers to be related to each other. With a datamodel that consists of Users that store Addresses, you might have an Address object and want to access the "user" attribute on it, or have a User object and want to get the list of Address objects. The easiest way to do this is via the `backreference` keyword described in <&formatting.myt:link, path="datamapping_relations_backreferences"&>. Although even when backreferences are used, it is sometimes necessary to explicitly specify the relations on both mappers pointing to each other. +To achieve this involves creating the first mapper by itself, then creating the second mapper referencing the first, then adding references to the first mapper to reference the second: +{python} + class User(object): + pass + class Address(object): + pass + User.mapper = mapper(User, users) + Address.mapper = mapper(Address, addresses, properties={ + 'user':relation(User.mapper) + }) + User.mapper.add_property('addresses', relation(Address.mapper)) + +Note that with a circular relationship as above, you cannot declare both relationships as "eager" relationships, since that produces a circular query situation which will generate a recursion exception. So what if you want to load an Address and its User eagerly? Just make a second mapper using options: +{python} + eagermapper = Address.mapper.options(eagerload('user')) + s = eagermapper.select(Address.c.address_id==12) +### Self Referential Mappers {@name=recursive} +A self-referential mapper is a mapper that is designed to operate with an <b>adjacency list</b> table. This is a table that contains one or more foreign keys back to itself, and is usually used to create hierarchical tree structures. SQLAlchemy's default model of saving items based on table dependencies is not sufficient in this case, as an adjacency list table introduces dependencies between individual rows. Fortunately, SQLAlchemy will automatically detect a self-referential mapper and do the extra lifting to make it work. + {python} + # define a self-referential table + trees = Table('treenodes', engine, + Column('node_id', Integer, primary_key=True), + Column('parent_node_id', Integer, ForeignKey('treenodes.node_id'), nullable=True), + Column('node_name', String(50), nullable=False), + ) + + # treenode class + class TreeNode(object): + pass + + # mapper defines "children" property, pointing back to TreeNode class, + # with the mapper unspecified. it will point back to the primary + # mapper on the TreeNode class. + TreeNode.mapper = mapper(TreeNode, trees, properties={ + 'children' : relation( + TreeNode, + private=True + ), + } + ) + + # or, specify the circular relationship after establishing the original mapper: + mymapper = mapper(TreeNode, trees) + + mymapper.add_property('children', relation( + mymapper, + private=True + )) + + + This kind of mapper goes through a lot of extra effort when saving and deleting items, to determine the correct dependency graph of nodes within the tree. + + A self-referential mapper where there is more than one relationship on the table requires that all join conditions be explicitly spelled out. Below is a self-referring table that contains a "parent_node_id" column to reference parent/child relationships, and a "root_node_id" column which points child nodes back to the ultimate root node: + {python} + # define a self-referential table with several relations + trees = Table('treenodes', engine, + Column('node_id', Integer, primary_key=True), + Column('parent_node_id', Integer, ForeignKey('treenodes.node_id'), nullable=True), + Column('root_node_id', Integer, ForeignKey('treenodes.node_id'), nullable=True), + Column('node_name', String(50), nullable=False), + ) + + # treenode class + class TreeNode(object): + pass + + # define the "children" property as well as the "root" property + TreeNode.mapper = mapper(TreeNode, trees, properties={ + 'children' : relation( + TreeNode, + primaryjoin=trees.c.parent_node_id==trees.c.node_id + private=True + ), + 'root' : relation( + TreeNode, + primaryjoin=trees.c.root_node_id=trees.c.node_id, + foreignkey=trees.c.node_id, + uselist=False + ) + } + ) + +The "root" property on a TreeNode is a many-to-one relationship. By default, a self-referential mapper declares relationships as one-to-many, so the extra parameter `foreignkey`, pointing to the "many" side of a relationship, is needed to indicate a "many-to-one" self-referring relationship. +Both TreeNode examples above are available in functional form in the `examples/adjacencytree` directory of the distribution. +### Result-Set Mapping {@name=resultset} + Take any result set and feed it into a mapper to produce objects. Multiple mappers can be combined to retrieve unrelated objects from the same row in one step. The `instances` method on mapper takes a ResultProxy object, which is the result type generated from SQLEngine, and delivers object instances. + <&|formatting.myt:code, title="single object"&> + class User(object): + pass + + User.mapper = mapper(User, users) + + # select users + c = users.select().execute() + + # get objects + userlist = User.mapper.instances(c) + + + <&|formatting.myt:code, title="multiple objects"&> + # define a second class/mapper + class Address(object): + pass + + Address.mapper = mapper(Address, addresses) + + # select users and addresses in one query + s = select([users, addresses], users.c.user_id==addresses.c.user_id) + + # execute it, and process the results with the User mapper, chained to the Address mapper + r = User.mapper.instances(s.execute(), Address.mapper) + + # result rows are an array of objects, one for each mapper used + for entry in r: + user = r[0] + address = r[1] +### Mapper Arguments {@name=arguments} +Other arguments not covered above include: +<ul> + <li>version_id_col=None - an integer-holding Column object that will be assigned an incrementing + counter, which is added to the WHERE clause used by UPDATE and DELETE statements. The matching row + count returned by the database is compared to the expected row count, and an exception is raised if they dont match. This is a basic "optimistic concurrency" check. Without the version id column, SQLAlchemy still compares the updated rowcount.</li> + <li>always_refresh=False - this option will cause the mapper to refresh all the attributes of all objects loaded by select/get statements, regardless of if they already exist in the current session. this includes all lazy- and eager-loaded relationship attributes, and will also overwrite any changes made to attributes on the column.</li> + <li>entity_name=None - this is an optional "entity name" that will be appended to the key used to associate classes to this mapper. What this basically means is, several primary mappers can be made against the same class by using different entity names; object instances will have the entity name tagged to them, so that all operations will occur on them relative to that mapper. When instantiating new objects, use <code>_sa_entity='name'</code> to tag them to the appropriate mapper.</li> +</ul> +### Extending Mapper {@name=extending} +Mappers can have functionality augmented or replaced at many points in its execution via the usage of the MapperExtension class. This class is just a series of "hooks" where various functionality takes place. An application can make its own MapperExtension objects, overriding only the methods it needs. + {python} + class MapperExtension(object): + def create_instance(self, mapper, row, imap, class_): + """called when a new object instance is about to be created from a row. + the method can choose to create the instance itself, or it can return + None to indicate normal object creation should take place. + + mapper - the mapper doing the operation + row - the result row from the database + imap - a dictionary that is storing the running set of objects collected from the + current result set + class_ - the class we are mapping. + """ + def append_result(self, mapper, row, imap, result, instance, isnew, populate_existing=False): + """called when an object instance is being appended to a result list. + + If it returns True, it is assumed that this method handled the appending itself. + + mapper - the mapper doing the operation + row - the result row from the database + imap - a dictionary that is storing the running set of objects collected from the + current result set + result - an instance of util.HistoryArraySet(), which may be an attribute on an + object if this is a related object load (lazy or eager). use result.append_nohistory(value) + to append objects to this list. + instance - the object instance to be appended to the result + isnew - indicates if this is the first time we have seen this object instance in the current result + set. if you are selecting from a join, such as an eager load, you might see the same object instance + many times in the same result set. + populate_existing - usually False, indicates if object instances that were already in the main + identity map, i.e. were loaded by a previous select(), get their attributes overwritten + """ + def before_insert(self, mapper, instance): + """called before an object instance is INSERTed into its table. + + this is a good place to set up primary key values and such that arent handled otherwise.""" + def after_insert(self, mapper, instance): + """called after an object instance has been INSERTed""" + def before_delete(self, mapper, instance): + """called before an object instance is DELETEed""" + + + To use MapperExtension, make your own subclass of it and just send it off to a mapper: + {python} + mapper = mapper(User, users, extension=MyExtension()) + + An existing mapper can create a copy of itself using an extension via the `extension` option: + {python} + extended_mapper = mapper.options(extension(MyExtension())) +### How Mapper Modifies Mapped Classes {@name=class} +This section is a quick summary of what's going on when you send a class to the `mapper()` function. This material, not required to be able to use SQLAlchemy, is a little more dense and should be approached patiently! + +The primary changes to a class that is mapped involve attaching property objects to it which represent table columns. These property objects essentially track changes. In addition, the __init__ method of the object is decorated to track object creates. +Here is a quick rundown of all the changes in code form: + {python} + # step 1 - override __init__ to 'register_new' with the Unit of Work + oldinit = myclass.__init__ + def init(self, *args, **kwargs): + nohist = kwargs.pop('_mapper_nohistory', False) + oldinit(self, *args, **kwargs) + if not nohist: + # register_new with Unit Of Work + objectstore.uow().register_new(self) + myclass.__init__ = init + + # step 2 - set a string identifier that will + # locate the classes' primary mapper + myclass._mapper = mapper.hashkey + + # step 3 - add column accessor + myclass.c = mapper.columns + + # step 4 - attribute decorating. + # this happens mostly within the package sqlalchemy.attributes + + # this dictionary will store a series of callables + # that generate "history" containers for + # individual object attributes + myclass._class_managed_attributes = {} + + # create individual properties for each column - + # these objects know how to talk + # to the attribute package to create appropriate behavior. + # the next example examines the attributes package more closely. + myclass.column1 = SmartProperty().property('column1', uselist=False) + myclass.column2 = SmartProperty().property('column2', uselist=True) + +The attribute package is used when save operations occur to get a handle on modified values. In the example below, +a full round-trip attribute tracking operation is illustrated: +{python} + import sqlalchemy.attributes as attributes + + # create an attribute manager. + # the sqlalchemy.mapping package keeps one of these around as + # 'objectstore.global_attributes' + manager = attributes.AttributeManager() + + # regular old new-style class + class MyClass(object): + pass + + # register a scalar and a list attribute + manager.register_attribute(MyClass, 'column1', uselist=False) + manager.register_attribute(MyClass, 'column2', uselist=True) + + # create/modify an object + obj = MyClass() + obj.column1 = 'this is a new value' + obj.column2.append('value 1') + obj.column2.append('value 2') + + # get history objects + col1_history = manager.get_history(obj, 'column1') + col2_history = manager.get_history(obj, 'column2') + + # whats new ? + >>> col1_history.added_items() + ['this is a new value'] + + >>> col2_history.added_items() + ['value1', 'value2'] + + # commit changes + manager.commit(obj) + + # the new values become the "unchanged" values + >>> col1_history.added_items() + [] + + >>> col1_history.unchanged_items() + ['this is a new value'] + + >>> col2_history.added_items() + [] + + >>> col2_history.unchanged_items() + ['value1', 'value2'] + +The above AttributeManager also includes a method `value_changed` which is triggered whenever change events occur on the managed object attributes. The Unit of Work (objectstore) package overrides this method in order to receive change events; its essentially this: +` + import sqlalchemy.attributes as attributes + class UOWAttributeManager(attributes.AttributeManager): + def value_changed(self, obj, key, value): + if hasattr(obj, '_instance_key'): + uow().register_dirty(obj) + else: + uow().register_new(obj) + + global_attributes = UOWAttributeManager() + +Objects that contain the attribute "_instance_key" are already registered with the Identity Map, and are assumed to have come from the database. They therefore get marked as "dirty" when changes happen. Objects without an "_instance_key" are not from the database, and get marked as "new" when changes happen, although usually this will already have occured via the object's __init__ method. + + </ins></span></pre></div> <a id="sqlalchemybranchesschemaexamplespolymorphconcretepy"></a> <div class="addfile"><h4>Added: sqlalchemy/branches/schema/examples/polymorph/concrete.py (1387 => 1388)</h4> <pre class="diff"><span> <span class="info">--- sqlalchemy/branches/schema/examples/polymorph/concrete.py 2006-05-04 04:38:20 UTC (rev 1387) +++ sqlalchemy/branches/schema/examples/polymorph/concrete.py 2006-05-04 19:18:24 UTC (rev 1388) </span><span class="lines">@@ -0,0 +1,65 @@ </span><ins>+from sqlalchemy import * + +metadata = MetaData() + +managers_table = Table('managers', metadata, + Column('employee_id', Integer, primary_key=True), + Column('name', String(50)), + Column('manager_data', String(40)) +) + +engineers_table = Table('engineers', metadata, + Column('employee_id', Integer, primary_key=True), + Column('name', String(50)), + Column('engineer_info', String(40)) +) + +engine = create_engine('sqlite:///', echo=True) +metadata.create_all(engine) + + +class Employee(object): + def __init__(self, name): + self.name = name + def __repr__(self): + return self.__class__.__name__ + " " + self.name + +class Manager(Employee): + def __init__(self, name, manager_data): + self.name = name + self.manager_data = manager_data + def __repr__(self): + return self.__class__.__name__ + " " + self.name + " " + self.manager_data + +class Engineer(Employee): + def __init__(self, name, engineer_info): + self.name = name + self.engineer_info = engineer_info + def __repr__(self): + return self.__class__.__name__ + " " + self.name + " " + self.engineer_info + + +pjoin = polymorphic_union({ + 'manager':managers_table, + 'engineer':engineers_table +}, 'type', 'pjoin') + +employee_mapper = mapper(Employee, pjoin, polymorphic_on=pjoin.c.type) +manager_mapper = mapper(Manager, managers_table, inherits=employee_mapper, concrete=True, polymorphic_ident='manager') +engineer_mapper = mapper(Engineer, engineers_table, inherits=employee_mapper, concrete=True, polymorphic_ident='engineer') + + +session = create_session(bind_to=engine) + +m1 = Manager("pointy haired boss", "manager1") +e1 = Engineer("wally", "engineer1") +e2 = Engineer("dilbert", "engineer2") + +session.save(m1) +session.save(e1) +session.save(e2) +session.flush() + +employees = session.query(Employee).select() +print [e for e in employees] + </ins></span></pre></div> <a id="sqlalchemybranchesschemaexamplespolymorphpolymorphpy"></a> <div class="modfile"><h4>Modified: sqlalchemy/branches/schema/examples/polymorph/polymorph.py (1387 => 1388)</h4> <pre class="diff"><span> <span class="info">--- sqlalchemy/branches/schema/examples/polymorph/polymorph.py 2006-05-04 04:38:20 UTC (rev 1387) +++ sqlalchemy/branches/schema/examples/polymorph/polymorph.py 2006-05-04 19:18:24 UTC (rev 1388) </span><span class="lines">@@ -4,7 +4,7 @@ </span><span class="cx"> # this example illustrates a polymorphic load of two classes, where each class has a very </span><span class="cx"> # different set of properties </span><span class="cx"> </span><del>-db = create_engine('sqlite://', echo=True, echo_uow=False) </del><ins>+db = create_engine('sqlite://', echo='debug', echo_uow=False) </ins><span class="cx"> </span><span class="cx"> # a table to store companies </span><span class="cx"> companies = Table('companies', db, </span><span class="lines">@@ -56,7 +56,13 @@ </span><span class="cx"> </span><span class="cx"> # create a union that represents both types of joins. we have to use </span><span class="cx"> # nulls to pad out the disparate columns. </span><del>-person_join = select( </del><ins>+person_join = polymorphic_union( + { + 'engineer':people.join(engineers), + 'manager':people.join(managers), + 'person':people, + }, None, 'pjoin') +p_person_join = select( </ins><span class="cx"> [ </span><span class="cx"> people, </span><span class="cx"> managers.c.status, </span><span class="lines">@@ -88,6 +94,7 @@ </span><span class="cx"> ) </span><span class="cx"> ).alias('pjoin') </span><span class="cx"> </span><ins>+ </ins><span class="cx"> person_mapper = mapper(Person, people, select_table=person_join, polymorphic_on=person_join.c.type, polymorphic_ident='person') </span><span class="cx"> mapper(Engineer, engineers, inherits=person_mapper, polymorphic_ident='engineer') </span><span class="cx"> mapper(Manager, managers, inherits=person_mapper, polymorphic_ident='manager') </span></span></pre></div> <a id="sqlalchemybranchesschemaexamplespolymorphsinglepy"></a> <div class="addfile"><h4>Added: sqlalchemy/branches/schema/examples/polymorph/single.py (1387 => 1388)</h4> <pre class="diff"><span> <span class="info">--- sqlalchemy/branches/schema/examples/polymorph/single.py 2006-05-04 04:38:20 UTC (rev 1387) +++ sqlalchemy/branches/schema/examples/polymorph/single.py 2006-05-04 19:18:24 UTC (rev 1388) </span><span class="lines">@@ -0,0 +1,25 @@ </span><ins>+from sqlalchemy import * + +metadata = MetaData() + +employees_table = Table('employees', metadata, + Column('employee_id', Integer, primary_key=True), + Column('name', String(50)), + Column('type', String(20)) +) + +engine = create_engine('sqlite:///') +metadata.create_all(engine) + +class Employee(object): + pass + +class Manager(Employee): + pass + +class Engineer(Employee): + pass + +employee_mapper = mapper(Employee, employees_table, polymorphic_on=employees_table.c.type) +manager_mapper = mapper(Manager, inherits=employee_mapper, polymorphic_identity='manager') +engineer_mapper = mapper(Engineer, inherits=employee_mapper, polymorphic_identity='engineer') </ins></span></pre></div> <a id="sqlalchemybranchesschemalibsqlalchemyorm__init__py"></a> <div class="modfile"><h4>Modified: sqlalchemy/branches/schema/lib/sqlalchemy/orm/__init__.py (1387 => 1388)</h4> <pre class="diff"><span> <span class="info">--- sqlalchemy/branches/schema/lib/sqlalchemy/orm/__init__.py 2006-05-04 04:38:20 UTC (rev 1387) +++ sqlalchemy/branches/schema/lib/sqlalchemy/orm/__init__.py 2006-05-04 19:18:24 UTC (rev 1388) </span><span class="lines">@@ -14,11 +14,12 @@ </span><span class="cx"> import sqlalchemy.util as util </span><span class="cx"> from exceptions import * </span><span class="cx"> from mapper import * </span><ins>+from util import polymorphic_union </ins><span class="cx"> from properties import * </span><span class="cx"> </span><span class="cx"> __all__ = ['relation', 'backref', 'eagerload', 'lazyload', 'noload', 'deferred', 'defer', 'undefer', </span><span class="cx"> 'mapper', 'clear_mappers', 'sql', 'extension', 'class_mapper', 'object_mapper', 'MapperExtension', </span><del>- 'cascade_mappers' </del><ins>+ 'cascade_mappers', 'polymorphic_union' </ins><span class="cx"> ] </span><span class="cx"> </span><span class="cx"> def relation(*args, **kwargs): </span></span></pre></div> <a id="sqlalchemybranchesschemalibsqlalchemyormmapperpy"></a> <div class="modfile"><h4>Modified: sqlalchemy/branches/schema/lib/sqlalchemy/orm/mapper.py (1387 => 1388)</h4> <pre class="diff"><span> <span class="info">--- sqlalchemy/branches/schema/lib/sqlalchemy/orm/mapper.py 2006-05-04 04:38:20 UTC (rev 1387) +++ sqlalchemy/branches/schema/lib/sqlalchemy/orm/mapper.py 2006-05-04 19:18:24 UTC (rev 1388) </span><span class="lines">@@ -54,6 +54,7 @@ </span><span class="cx"> polymorphic_on=None, </span><span class="cx"> polymorphic_map=None, </span><span class="cx"> polymorphic_ident=None, </span><ins>+ concrete=False, </ins><span class="cx"> select_table=None): </span><span class="cx"> </span><span class="cx"> ext = MapperExtension() </span><span class="lines">@@ -98,7 +99,7 @@ </span><span class="cx"> # tables - a collection of underlying Table objects pulled from mapped_table </span><span class="cx"> </span><span class="cx"> for table in (local_table, select_table): </span><del>- if table is not None and isinstance(local_table, sql.Select): </del><ins>+ if table is not None and isinstance(local_table, sql.SelectBaseMixin): </ins><span class="cx"> # some db's, noteably postgres, dont want to select from a select </span><span class="cx"> # without an alias. also if we make our own alias internally, then </span><span class="cx"> # the configured properties on the mapper are not matched against the alias </span><span class="lines">@@ -115,19 +116,23 @@ </span><span class="cx"> raise ArgumentError("Class '%s' does not inherit from '%s'" % (self.class_.__name__, inherits.class_.__name__)) </span><span class="cx"> # inherit_condition is optional. </span><span class="cx"> if not local_table is inherits.local_tab... [truncated message content] |