# projection - a module to remedy the lack of projection in SQLObject.
# (c) 2005 Peter Goodspeed
# This module is available under the terms of the GNU General Public License
#
# This module DOES NOT operate at an SQL level--I don't have the SQL chops
# to attempt to do that properly. Instead, it mimics SQL functionality at the
# program level. This works, but it will never be as efficient as handling
# the queries at the database level.
#
# Usage: Have your data object extend Projectable as well as SQLObject.
# Then, use the .project method.
from sqlobject import SQLObject
from inspect import ismethod
from sqlobject.sqlbuilder import Field
from random import random
from __future__ import division
from copy import copy
class Row(object):
"""A template to hold the returned columns"""
class __rowIter(object):
"""An iterator over a row"""
def __init__(self, attributes):
"""Attributes are a dictionary of the current attributes and values
of the row which generated this iterator."""
self.__ats = attributes
self.__gen = self.gen()
def gen(self):
for key in self.__ats:
yield (key, self.__ats[key])
def next(self): return self.__gen.next()
def __iter__(self): return self
def __str__(self): return self.__ats
def __init__(self, idict = {}):
for key in idict:
setattr(self, key, idict[key])
def __iter__(self):
r = {}
for a in self.keys():
r[a] = getattr(self, a)
return self.__rowIter(r)
def __str__(self): return str(dict(list(self)))
def __eq__(self, other):
return dict(list(self))==dict(list(other))
def __len__(self):
return len(self.keys())
def keys(self):
return [at for at in dir(self)
if '__' not in at
and not ismethod(getattr(self, at))]
def values(self): return [getattr(self, key) for key in self.keys()]
##from aspn.activestate.com/ASPN/Cookbook/Python/Recipe/414283
class Frozendict(dict):
def _blocked_attribute(obj):
raise AttributeError, "A frozendict cannot be modified."
_blocked_attribute = property(_blocked_attribute)
__delitem__ = __setitem__ = clear = _blocked_attribute
pop = popitem = setdefault = update = _blocked_attribute
def __new__(cls, *args, **kw):
new = dict.__new__(cls)
dict.__init__(new, *args, **kw)
return new
def __init__(self, *args, **kw):
pass
def __hash__(self):
try:
return self._cached_hash
except AttributeError:
h = self._cached_hash = hash(tuple(sorted(self.items())))
return h
def __repr__(self):
return "frozendict(%s)" % dict.__repr__(self)
class ProjectionError(Exception):
def __init__(self, s):
self.s = s
def __str__(self):
return self.__class__.__name__ + ": " + self.s
class ColumnError: pass
class ColumnNotFound(ColumnError): pass
class OptionError(ProjectionError): pass
class Projectable(SQLObject):
@classmethod
def project(cls,*args,**kw):
"""Projects particular columns from the database and applies options
Definition:
In database terms, a projection involves choosing one or many columns
from the database and returning only those rows.
To project a particular column from the database, simply specify its
name as a string. Ex: Table.project('col1','col2')
This is syntactic sugar; the previous example is exactly equivalent to
Table.project(col1=['value'], col2=['value'])
There is a magic parameter: where. This doesn't look up a column named
'where'; it instead passes its value to the SQLObject select. If you have
a column in your table named 'where', you're out of luck.
To apply options to a column, name them in a list supplied as
a keyword argument with the column name as the keyword. Multiple options
can be applied to the same column.
Ex: Table.project(col1=['value','distinct'],col2=['countdistinct'])
The above query would return, for each distinct value in col1, the value
of col1 and the number of distinct values col2 takes for that distinct
value of col1.
Valid options follow:
distinct, value, count, countdistinct, min, max, avg, sum, mode, first, random
distinct:
Filters the output by distinct values in the columns selected.
If distinct is specified for multiple columns, the results will be
grouped on distinct tuples comprised of values from every column
marked distinct.
Output column name: None
value
Includes the column's value in every row. If any column has been marked
distinct, this option may only be set on columns marked distinct.
Output column name:
count
Returns the number of rows in the result set. If any column has been
marked distinct, the count is grouped by the distinct tuples.
Output column name: count
countdistinct
Returns the number of distinct values in a particular column. If any
column has been marked distinct, the count is grouped by the distinct
tuples. Note that countdistinct on a column already marked distinct
will always return 1.
Output column name: Distinct
min
Returns the minimum value in a particular column. If any column has been
marked distinct, this is computed per tuple.
Output column name: Min
max
Returns the maximum value in a particular column. If any column has been
marked distinct, this is computed per tuple.
Output column name: Max
avg
Returns the average value in a particular column. If any column has been
marked distinct, this is computed per tuple.
Output column name: Avg
sum
Returns the sum of a particular column. If any column has been
marked distinct, this is computed per tuple.
Output column name: Sum
mode
Returns the most set of values which appear most often in a column. If
any column has been marked distinct, this is computed per tuple.
Output column name: Mode
first
Returns the first value in a particular column. If any column has been
marked distinct, this is computed per tuple. Note that unless the output
was sorted in the where clause, there is no way to gaurantee which
value is returned first.
Output column name: First
random
Returns a value chosen randomly from those in a particular column. If
any column has been marked distinct, this is computed per tuple.
Output column name: Rnd
"""
where = None
#get the columns chosen, and ensure that they're in the database
cols = {}
#start with the sugar
for a in args:
if isinstance(a,str):
if a not in cls.sqlmeta.columns.keys():
raise ColumnNotFound(str(a)+" not in the database!")
else:
cols[a] = set(['value'])
#deal with the vinegar
for k in kw:
if k.lower() != 'where':
if k not in cls.sqlmeta.columns.keys():
raise ColumnNotFound(str(k)+" not in the database!")
else:
if k in cols.keys():
for i in kw[k]: cols[k].add(i)
else:
cols[k] = set(kw[k])
else: where = kw[k]
#so now we have this, for every col and option:
#cols = {col1 : set('op1','op2'), col2 : set('op2','op3')}
#let's do a bit of sanity checking now
distinctCols = set()
for col in cols:
if 'distinct' in cols[col]:
distinctCols.add(col)
if len(distinctCols) > 0:
for col in cols:
if 'value' in cols[col] and 'distinct' not in cols[col]:
raise OptionError(), "No values allowed for non-distinct cols!"
#it's time to get our rowset
rows = list(cls.select(where))
groupings = []
#that's done. Let's generate the groupings
if len(distinctCols) == 0:
groupings.append(rows)
else:
rowMap = {}
for row in rows:
#for this row: is the set of distinct values in rowMap?
dv = {}
for dc in distinctCols: dv[dc] = getattr(row, dc)
dv = Frozendict(dv)
if dv in rowMap.keys():
#yup! Add this row to that grouping
rowMap[dv].append(row)
else:
#this is new: create a new grouping
rowMap[dv] = [row]
#we're done with the map now
groupings = rowMap.values()
#at this point, we have groupings:
# [[TableRow,TableRow,...],[TableRow,...],...]
#now we just need to process and get out
ret = []
for rowSet in groupings:
rr = Row()
for col in cols:
#generate a column view now to avoid repetition
colView = [getattr(r, col) for r in rowSet]
#now we get to go through per option and set things
for opt in cols[col]:
if opt == 'distinct' or opt == 'value':
#distinct is the only one with no return value
#and we deal with value later
pass
elif opt == 'count':
#count is special: it only happens once
if not hasattr(rr,'count'):
setattr(rr,'count',len(rowSet))
else:
name, val = {
'countdistinct': lambda c,r: (c+'Distinct',len(set(colView))),
'min': lambda c,r: (c+'Min',min(colView)),
'max': lambda c,r: (c+'Max',max(colView)),
'avg': lambda c,r: (c+'Avg',sum(colView)/len(colView)),
'sum': lambda c,r: (c+'Sum',sum(colView)),
'mode': lambda c,r: (c+'Mode',__mode(colView)),
'first': lambda c,r: (c+'First',colView[0]),
'random': lambda c,r: (c+'Rnd',colView[int(len(colView) * random())])
}[opt](col, rowSet)
setattr(rr,name,val)
# endif
# next opt
# next col
#deal with values
if len(distinctCols) == 0:
#we want to emit everything in the rowset
for row in rowSet:
for col in cols:
if 'value' in cols[col]:
rt = copy(rr)
setattr(rt,col,getattr(row,col))
ret.append(rt)
else:
#we only need to emit the values in the first row; all others
# have identical values
row = rowSet[0]
for col in cols:
if 'value' in cols[col]:
setattr(rr, col, getattr(row, col))
ret.append(rr)
return ret
def __mode(l):
"Returns a list of the mathematical mode of the input list"
freq = {}
for i in l:
if freq.has_key(i):
freq[i] += 1
else:
freq[i] = 0
return [k for k in freq if freq[k] == max(freq.values())]