Browse Source

Fixed #3566 -- Added support for aggregation to the ORM. See the documentation for details on usage.

Many thanks to:
 * Nicolas Lara, who worked on this feature during the 2008 Google Summer of Code.
 * Alex Gaynor for his help debugging and fixing a number of issues.
 * Justin Bronn for his help integrating with contrib.gis.
 * Karen Tracey for her help with cross-platform testing.
 * Ian Kelly for his help testing and fixing Oracle support.
 * Malcolm Tredinnick for his invaluable review notes.

git-svn-id: http://code.djangoproject.com/svn/django/trunk@9742 bcc190cf-cafb-0310-a4f2-bffc1f526a37
Russell Keith-Magee 16 years ago
parent
commit
cc4e4d9aee

+ 1 - 0
AUTHORS

@@ -31,6 +31,7 @@ answer newbie questions, and generally made Django that much better:
     AgarFu <heaven@croasanaso.sytes.net>
     Dagur Páll Ammendrup <dagurp@gmail.com>
     Collin Anderson <cmawebsite@gmail.com>
+    Nicolas Lara <nicolaslara@gmail.com>
     Jeff Anderson <jefferya@programmerq.net>
     Marian Andre <django@andre.sk>
     Andreas

+ 10 - 0
django/contrib/gis/db/models/aggregates.py

@@ -0,0 +1,10 @@
+from django.db.models import Aggregate
+
+class Extent(Aggregate):
+    name = 'Extent'
+
+class MakeLine(Aggregate):
+    name = 'MakeLine'
+
+class Union(Aggregate):
+    name = 'Union'

+ 68 - 121
django/contrib/gis/db/models/query.py

@@ -3,6 +3,7 @@ from django.db import connection
 from django.db.models.query import sql, QuerySet, Q
 
 from django.contrib.gis.db.backend import SpatialBackend
+from django.contrib.gis.db.models import aggregates
 from django.contrib.gis.db.models.fields import GeometryField, PointField
 from django.contrib.gis.db.models.sql import AreaField, DistanceField, GeomField, GeoQuery, GeoWhereNode
 from django.contrib.gis.measure import Area, Distance
@@ -17,7 +18,7 @@ class GeomSQL(object):
     "Simple wrapper object for geometric SQL."
     def __init__(self, geo_sql):
         self.sql = geo_sql
-    
+
     def as_sql(self, *args, **kwargs):
         return self.sql
 
@@ -30,7 +31,7 @@ class GeoQuerySet(QuerySet):
 
     def area(self, tolerance=0.05, **kwargs):
         """
-        Returns the area of the geographic field in an `area` attribute on 
+        Returns the area of the geographic field in an `area` attribute on
         each element of this GeoQuerySet.
         """
         # Peforming setup here rather than in `_spatial_attribute` so that
@@ -75,21 +76,21 @@ class GeoQuerySet(QuerySet):
 
         Keyword Arguments:
          `spheroid`  => If the geometry field is geodetic and PostGIS is
-                        the spatial database, then the more accurate 
+                        the spatial database, then the more accurate
                         spheroid calculation will be used instead of the
                         quicker sphere calculation.
-                        
-         `tolerance` => Used only for Oracle. The tolerance is 
-                        in meters -- a default of 5 centimeters (0.05) 
+
+         `tolerance` => Used only for Oracle. The tolerance is
+                        in meters -- a default of 5 centimeters (0.05)
                         is used.
         """
         return self._distance_attribute('distance', geom, **kwargs)
 
     def envelope(self, **kwargs):
         """
-        Returns a Geometry representing the bounding box of the 
+        Returns a Geometry representing the bounding box of the
         Geometry field in an `envelope` attribute on each element of
-        the GeoQuerySet. 
+        the GeoQuerySet.
         """
         return self._geom_attribute('envelope', **kwargs)
 
@@ -98,20 +99,7 @@ class GeoQuerySet(QuerySet):
         Returns the extent (aggregate) of the features in the GeoQuerySet.  The
         extent will be returned as a 4-tuple, consisting of (xmin, ymin, xmax, ymax).
         """
-        convert_extent = None
-        if SpatialBackend.postgis:
-            def convert_extent(box, geo_field):
-                # TODO: Parsing of BOX3D, Oracle support (patches welcome!)
-                # Box text will be something like "BOX(-90.0 30.0, -85.0 40.0)"; 
-                # parsing out and returning as a 4-tuple.
-                ll, ur = box[4:-1].split(',')
-                xmin, ymin = map(float, ll.split())
-                xmax, ymax = map(float, ur.split())
-                return (xmin, ymin, xmax, ymax)
-        elif SpatialBackend.oracle:
-            def convert_extent(wkt, geo_field):
-                raise NotImplementedError
-        return self._spatial_aggregate('extent', convert_func=convert_extent, **kwargs)
+        return self._spatial_aggregate(aggregates.Extent, **kwargs)
 
     def gml(self, precision=8, version=2, **kwargs):
         """
@@ -120,7 +108,7 @@ class GeoQuerySet(QuerySet):
         """
         s = {'desc' : 'GML', 'procedure_args' : {'precision' : precision}}
         if SpatialBackend.postgis:
-            # PostGIS AsGML() aggregate function parameter order depends on the 
+            # PostGIS AsGML() aggregate function parameter order depends on the
             # version -- uggh.
             major, minor1, minor2 = SpatialBackend.version
             if major >= 1 and (minor1 > 3 or (minor1 == 3 and minor2 > 1)):
@@ -163,9 +151,7 @@ class GeoQuerySet(QuerySet):
         this GeoQuerySet and returns it.  This is a spatial aggregate
         method, and thus returns a geometry rather than a GeoQuerySet.
         """
-        kwargs['geo_field_type'] = PointField
-        kwargs['agg_field'] = GeometryField
-        return self._spatial_aggregate('make_line', **kwargs)
+        return self._spatial_aggregate(aggregates.MakeLine, geo_field_type=PointField, **kwargs)
 
     def mem_size(self, **kwargs):
         """
@@ -185,7 +171,7 @@ class GeoQuerySet(QuerySet):
 
     def num_points(self, **kwargs):
         """
-        Returns the number of points in the first linestring in the 
+        Returns the number of points in the first linestring in the
         Geometry field in a `num_points` attribute on each element of
         this GeoQuerySet; otherwise sets with None.
         """
@@ -231,7 +217,7 @@ class GeoQuerySet(QuerySet):
 
     def sym_difference(self, geom, **kwargs):
         """
-        Returns the symmetric difference of the geographic field in a 
+        Returns the symmetric difference of the geographic field in a
         `sym_difference` attribute on each element of this GeoQuerySet.
         """
         return self._geomset_attribute('sym_difference', geom, **kwargs)
@@ -265,7 +251,7 @@ class GeoQuerySet(QuerySet):
         # when there's also a transformation we need to cascade the substitutions.
         # For example, 'SDO_UTIL.TO_WKTGEOMETRY(SDO_CS.TRANSFORM( ... )'
         geo_col = self.query.custom_select.get(geo_field, field_col)
-        
+
         # Setting the key for the field's column with the custom SELECT SQL to
         # override the geometry column returned from the database.
         custom_sel = '%s(%s, %s)' % (SpatialBackend.transform, geo_col, srid)
@@ -288,11 +274,10 @@ class GeoQuerySet(QuerySet):
         None if the GeoQuerySet is empty.  The `tolerance` keyword is for
         Oracle backends only.
         """
-        kwargs['agg_field'] = GeometryField
-        return self._spatial_aggregate('unionagg', **kwargs)
+        return self._spatial_aggregate(aggregates.Union, **kwargs)
 
     ### Private API -- Abstracted DRY routines. ###
-    def _spatial_setup(self, att, aggregate=False, desc=None, field_name=None, geo_field_type=None):
+    def _spatial_setup(self, att, desc=None, field_name=None, geo_field_type=None):
         """
         Performs set up for executing the spatial function.
         """
@@ -301,86 +286,52 @@ class GeoQuerySet(QuerySet):
         if desc is None: desc = att
         if not func: raise ImproperlyConfigured('%s stored procedure not available.' % desc)
 
-        # Initializing the procedure arguments. 
+        # Initializing the procedure arguments.
         procedure_args = {'function' : func}
-        
-        # Is there a geographic field in the model to perform this 
+
+        # Is there a geographic field in the model to perform this
         # operation on?
         geo_field = self.query._geo_field(field_name)
         if not geo_field:
             raise TypeError('%s output only available on GeometryFields.' % func)
 
-        # If the `geo_field_type` keyword was used, then enforce that 
+        # If the `geo_field_type` keyword was used, then enforce that
         # type limitation.
-        if not geo_field_type is None and not isinstance(geo_field, geo_field_type): 
-            raise TypeError('"%s" stored procedures may only be called on %ss.' % (func, geo_field_type.__name__)) 
+        if not geo_field_type is None and not isinstance(geo_field, geo_field_type):
+            raise TypeError('"%s" stored procedures may only be called on %ss.' % (func, geo_field_type.__name__))
 
         # Setting the procedure args.
-        procedure_args['geo_col'] = self._geocol_select(geo_field, field_name, aggregate)
+        procedure_args['geo_col'] = self._geocol_select(geo_field, field_name)
 
         return procedure_args, geo_field
 
-    def _spatial_aggregate(self, att, field_name=None, 
-                           agg_field=None, convert_func=None, 
-                           geo_field_type=None, tolerance=0.0005):
+    def _spatial_aggregate(self, aggregate, field_name=None,
+                           geo_field_type=None, tolerance=0.05):
         """
         DRY routine for calling aggregate spatial stored procedures and
         returning their result to the caller of the function.
         """
-        # Constructing the setup keyword arguments.
-        setup_kwargs = {'aggregate' : True,
-                        'field_name' : field_name,
-                        'geo_field_type' : geo_field_type,
-                        }
-        procedure_args, geo_field = self._spatial_setup(att, **setup_kwargs)
-        
-        if SpatialBackend.oracle:
-            procedure_args['tolerance'] = tolerance
-            # Adding in selection SQL for Oracle geometry columns.
-            if agg_field is GeometryField: 
-                agg_sql = '%s' % SpatialBackend.select
-            else: 
-                agg_sql = '%s'
-            agg_sql =  agg_sql % ('%(function)s(SDOAGGRTYPE(%(geo_col)s,%(tolerance)s))' % procedure_args)
-        else:
-            agg_sql = '%(function)s(%(geo_col)s)' % procedure_args
-
-        # Wrapping our selection SQL in `GeomSQL` to bypass quoting, and
-        # specifying the type of the aggregate field.
-        self.query.select = [GeomSQL(agg_sql)]
-        self.query.select_fields = [agg_field]
-
-        try:
-            # `asql` => not overriding `sql` module.
-            asql, params = self.query.as_sql()
-        except sql.datastructures.EmptyResultSet:
-            return None   
-
-        # Getting a cursor, executing the query, and extracting the returned
-        # value from the aggregate function.
-        cursor = connection.cursor()
-        cursor.execute(asql, params)
-        result = cursor.fetchone()[0]
-        
-        # If the `agg_field` is specified as a GeometryField, then autmatically
-        # set up the conversion function.
-        if agg_field is GeometryField and not callable(convert_func):
-            if SpatialBackend.postgis:
-                def convert_geom(hex, geo_field):
-                    if hex: return SpatialBackend.Geometry(hex)
-                    else: return None
-            elif SpatialBackend.oracle:
-                def convert_geom(clob, geo_field):
-                    if clob: return SpatialBackend.Geometry(clob.read(), geo_field._srid)
-                    else: return None
-            convert_func = convert_geom
-
-        # Returning the callback function evaluated on the result culled
-        # from the executed cursor.
-        if callable(convert_func):
-            return convert_func(result, geo_field)
-        else:
-            return result
+        # Getting the field the geographic aggregate will be called on.
+        geo_field = self.query._geo_field(field_name)
+        if not geo_field:
+            raise TypeError('%s aggregate only available on GeometryFields.' % aggregate.name)
+
+        # Checking if there are any geo field type limitations on this
+        # aggregate (e.g. ST_Makeline only operates on PointFields).
+        if not geo_field_type is None and not isinstance(geo_field, geo_field_type):
+            raise TypeError('%s aggregate may only be called on %ss.' % (aggregate.name, geo_field_type.__name__))
+
+        # Getting the string expression of the field name, as this is the
+        # argument taken by `Aggregate` objects.
+        agg_col = field_name or geo_field.name
+
+        # Adding any keyword parameters for the Aggregate object. Oracle backends
+        # in particular need an additional `tolerance` parameter.
+        agg_kwargs = {}
+        if SpatialBackend.oracle: agg_kwargs['tolerance'] = tolerance
+
+        # Calling the QuerySet.aggregate, and returning only the value of the aggregate.
+        return self.aggregate(_geoagg=aggregate(agg_col, **agg_kwargs))['_geoagg']
 
     def _spatial_attribute(self, att, settings, field_name=None, model_att=None):
         """
@@ -393,7 +344,7 @@ class GeoQuerySet(QuerySet):
           SQL function to call.
 
          settings:
-          Dictonary of internal settings to customize for the spatial procedure. 
+          Dictonary of internal settings to customize for the spatial procedure.
 
         Public Keyword Arguments:
 
@@ -420,7 +371,7 @@ class GeoQuerySet(QuerySet):
             for k, v in default_args.iteritems(): settings['procedure_args'].setdefault(k, v)
         else:
             geo_field = settings['geo_field']
-            
+
         # The attribute to attach to the model.
         if not isinstance(model_att, basestring): model_att = att
 
@@ -429,7 +380,7 @@ class GeoQuerySet(QuerySet):
             # Using the field's get_db_prep_lookup() to get any needed
             # transformation SQL -- we pass in a 'dummy' `contains` lookup.
             where, params = geo_field.get_db_prep_lookup('contains', settings['procedure_args'][name])
-            # Replacing the procedure format with that of any needed 
+            # Replacing the procedure format with that of any needed
             # transformation SQL.
             old_fmt = '%%(%s)s' % name
             new_fmt = where[0] % '%%s'
@@ -438,7 +389,7 @@ class GeoQuerySet(QuerySet):
 
         # Getting the format for the stored procedure.
         fmt = '%%(function)s(%s)' % settings['procedure_fmt']
-        
+
         # If the result of this function needs to be converted.
         if settings.get('select_field', False):
             sel_fld = settings['select_field']
@@ -446,10 +397,10 @@ class GeoQuerySet(QuerySet):
                 self.query.custom_select[model_att] = SpatialBackend.select
             self.query.extra_select_fields[model_att] = sel_fld
 
-        # Finally, setting the extra selection attribute with 
+        # Finally, setting the extra selection attribute with
         # the format string expanded with the stored procedure
         # arguments.
-        return self.extra(select={model_att : fmt % settings['procedure_args']}, 
+        return self.extra(select={model_att : fmt % settings['procedure_args']},
                           select_params=settings['select_params'])
 
     def _distance_attribute(self, func, geom=None, tolerance=0.05, spheroid=False, **kwargs):
@@ -471,10 +422,10 @@ class GeoQuerySet(QuerySet):
         distance = func == 'distance'
         length = func == 'length'
         perimeter = func == 'perimeter'
-        if not (distance or length or perimeter): 
+        if not (distance or length or perimeter):
             raise ValueError('Unknown distance function: %s' % func)
 
-        # The field's get_db_prep_lookup() is used to get any 
+        # The field's get_db_prep_lookup() is used to get any
         # extra distance parameters.  Here we set up the
         # parameters that will be passed in to field's function.
         lookup_params = [geom or 'POINT (0 0)', 0]
@@ -482,12 +433,12 @@ class GeoQuerySet(QuerySet):
         # If the spheroid calculation is desired, either by the `spheroid`
         # keyword or wehn calculating the length of geodetic field, make
         # sure the 'spheroid' distance setting string is passed in so we
-        # get the correct spatial stored procedure.            
-        if spheroid or (SpatialBackend.postgis and geo_field.geodetic and length): 
-            lookup_params.append('spheroid') 
+        # get the correct spatial stored procedure.
+        if spheroid or (SpatialBackend.postgis and geo_field.geodetic and length):
+            lookup_params.append('spheroid')
         where, params = geo_field.get_db_prep_lookup('distance_lte', lookup_params)
 
-        # The `geom_args` flag is set to true if a geometry parameter was 
+        # The `geom_args` flag is set to true if a geometry parameter was
         # passed in.
         geom_args = bool(geom)
 
@@ -505,7 +456,7 @@ class GeoQuerySet(QuerySet):
                 geodetic = unit_name in geo_field.geodetic_units
             else:
                 geodetic = geo_field.geodetic
-            
+
             if distance:
                 if self.query.transformed_srid:
                     # Setting the `geom_args` flag to false because we want to handle
@@ -515,7 +466,7 @@ class GeoQuerySet(QuerySet):
                     geom_args = False
                     procedure_fmt = '%s(%%(geo_col)s, %s)' % (SpatialBackend.transform, self.query.transformed_srid)
                     if geom.srid is None or geom.srid == self.query.transformed_srid:
-                        # If the geom parameter srid is None, it is assumed the coordinates 
+                        # If the geom parameter srid is None, it is assumed the coordinates
                         # are in the transformed units.  A placeholder is used for the
                         # geometry parameter.
                         procedure_fmt += ', %%s'
@@ -529,10 +480,10 @@ class GeoQuerySet(QuerySet):
 
                 if geodetic:
                     # Spherical distance calculation is needed (because the geographic
-                    # field is geodetic). However, the PostGIS ST_distance_sphere/spheroid() 
+                    # field is geodetic). However, the PostGIS ST_distance_sphere/spheroid()
                     # procedures may only do queries from point columns to point geometries
                     # some error checking is required.
-                    if not isinstance(geo_field, PointField): 
+                    if not isinstance(geo_field, PointField):
                         raise TypeError('Spherical distance calculation only supported on PointFields.')
                     if not str(SpatialBackend.Geometry(buffer(params[0].wkb)).geom_type) == 'Point':
                         raise TypeError('Spherical distance calculation only supported with Point Geometry parameters')
@@ -553,12 +504,12 @@ class GeoQuerySet(QuerySet):
 
         # Setting up the settings for `_spatial_attribute`.
         s = {'select_field' : DistanceField(dist_att),
-             'setup' : False, 
+             'setup' : False,
              'geo_field' : geo_field,
              'procedure_args' : procedure_args,
              'procedure_fmt' : procedure_fmt,
              }
-        if geom_args: 
+        if geom_args:
             s['geom_args'] = ('geom',)
             s['procedure_args']['geom'] = geom
         elif geom:
@@ -577,12 +528,12 @@ class GeoQuerySet(QuerySet):
             s['procedure_fmt'] = '%(geo_col)s,%(tolerance)s'
             s['procedure_args'] = {'tolerance' : tolerance}
         return self._spatial_attribute(func, s, **kwargs)
-                     
+
     def _geomset_attribute(self, func, geom, tolerance=0.05, **kwargs):
         """
         DRY routine for setting up a GeoQuerySet method that attaches a
         Geometry attribute and takes a Geoemtry parameter.  This is used
-        for geometry set-like operations (e.g., intersection, difference, 
+        for geometry set-like operations (e.g., intersection, difference,
         union, sym_difference).
         """
         s = {'geom_args' : ('geom',),
@@ -595,16 +546,12 @@ class GeoQuerySet(QuerySet):
             s['procedure_args']['tolerance'] = tolerance
         return self._spatial_attribute(func, s, **kwargs)
 
-    def _geocol_select(self, geo_field, field_name, aggregate=False):
+    def _geocol_select(self, geo_field, field_name):
         """
         Helper routine for constructing the SQL to select the geographic
         column.  Takes into account if the geographic field is in a
         ForeignKey relation to the current model.
         """
-        # If this is an aggregate spatial query, the flag needs to be
-        # set on the `GeoQuery` object of this queryset.
-        if aggregate: self.query.aggregate = True
-
         opts = self.model._meta
         if not geo_field in opts.fields:
             # Is this operation going to be on a related geographic field?

+ 36 - 0
django/contrib/gis/db/models/sql/aggregates.py

@@ -0,0 +1,36 @@
+from django.db.models.sql.aggregates import *
+
+from django.contrib.gis.db.models.fields import GeometryField
+from django.contrib.gis.db.backend import SpatialBackend
+
+if SpatialBackend.oracle:
+    geo_template = '%(function)s(SDOAGGRTYPE(%(field)s,%(tolerance)s))'
+else:
+    geo_template = '%(function)s(%(field)s)'
+
+class GeoAggregate(Aggregate):
+    # Overriding the SQL template with the geographic one.
+    sql_template = geo_template
+
+    is_extent = False
+
+    def __init__(self, col, source=None, is_summary=False, **extra):
+        super(GeoAggregate, self).__init__(col, source, is_summary, **extra)
+
+        # Can't use geographic aggregates on non-geometry fields.
+        if not isinstance(self.source, GeometryField):
+            raise ValueError('Geospatial aggregates only allowed on geometry fields.')
+
+        # Making sure the SQL function is available for this spatial backend.
+        if not self.sql_function:
+            raise NotImplementedError('This aggregate functionality not implemented for your spatial backend.')
+
+class Extent(GeoAggregate):
+    is_extent = True
+    sql_function = SpatialBackend.extent
+
+class MakeLine(GeoAggregate):
+    sql_function = SpatialBackend.make_line
+
+class Union(GeoAggregate):
+    sql_function = SpatialBackend.unionagg

+ 85 - 45
django/contrib/gis/db/models/sql/query.py

@@ -5,6 +5,7 @@ from django.db.models.fields.related import ForeignKey
 
 from django.contrib.gis.db.backend import SpatialBackend
 from django.contrib.gis.db.models.fields import GeometryField
+from django.contrib.gis.db.models.sql import aggregates as gis_aggregates_module
 from django.contrib.gis.db.models.sql.where import GeoWhereNode
 from django.contrib.gis.measure import Area, Distance
 
@@ -12,12 +13,35 @@ from django.contrib.gis.measure import Area, Distance
 ALL_TERMS = sql.constants.QUERY_TERMS.copy()
 ALL_TERMS.update(SpatialBackend.gis_terms)
 
+# Conversion functions used in normalizing geographic aggregates.
+if SpatialBackend.postgis:
+    def convert_extent(box):
+        # TODO: Parsing of BOX3D, Oracle support (patches welcome!)
+        # Box text will be something like "BOX(-90.0 30.0, -85.0 40.0)";
+        # parsing out and returning as a 4-tuple.
+        ll, ur = box[4:-1].split(',')
+        xmin, ymin = map(float, ll.split())
+        xmax, ymax = map(float, ur.split())
+        return (xmin, ymin, xmax, ymax)
+
+    def convert_geom(hex, geo_field):
+        if hex: return SpatialBackend.Geometry(hex)
+        else: return None
+else:
+    def convert_extent(box):
+        raise NotImplementedError('Aggregate extent not implemented for this spatial backend.')
+
+    def convert_geom(clob, geo_field):
+        if clob: return SpatialBackend.Geometry(clob.read(), geo_field._srid)
+        else: return None
+
 class GeoQuery(sql.Query):
     """
     A single spatial SQL query.
     """
     # Overridding the valid query terms.
     query_terms = ALL_TERMS
+    aggregates_module = gis_aggregates_module
 
     #### Methods overridden from the base Query class ####
     def __init__(self, model, conn):
@@ -25,7 +49,6 @@ class GeoQuery(sql.Query):
         # The following attributes are customized for the GeoQuerySet.
         # The GeoWhereNode and SpatialBackend classes contain backend-specific
         # routines and functions.
-        self.aggregate = False
         self.custom_select = {}
         self.transformed_srid = None
         self.extra_select_fields = {}
@@ -34,7 +57,6 @@ class GeoQuery(sql.Query):
         obj = super(GeoQuery, self).clone(*args, **kwargs)
         # Customized selection dictionary and transformed srid flag have
         # to also be added to obj.
-        obj.aggregate = self.aggregate
         obj.custom_select = self.custom_select.copy()
         obj.transformed_srid = self.transformed_srid
         obj.extra_select_fields = self.extra_select_fields.copy()
@@ -50,12 +72,12 @@ class GeoQuery(sql.Query):
         (without the table names) are given unique aliases. This is needed in
         some cases to avoid ambiguitity with nested queries.
 
-        This routine is overridden from Query to handle customized selection of 
+        This routine is overridden from Query to handle customized selection of
         geometry columns.
         """
         qn = self.quote_name_unless_alias
         qn2 = self.connection.ops.quote_name
-        result = ['(%s) AS %s' % (self.get_extra_select_format(alias) % col[0], qn2(alias)) 
+        result = ['(%s) AS %s' % (self.get_extra_select_format(alias) % col[0], qn2(alias))
                   for alias, col in self.extra_select.iteritems()]
         aliases = set(self.extra_select.keys())
         if with_aliases:
@@ -67,38 +89,53 @@ class GeoQuery(sql.Query):
             for col, field in izip(self.select, self.select_fields):
                 if isinstance(col, (list, tuple)):
                     r = self.get_field_select(field, col[0])
-                    if with_aliases and col[1] in col_aliases:
-                        c_alias = 'Col%d' % len(col_aliases)
-                        result.append('%s AS %s' % (r, c_alias))
-                        aliases.add(c_alias)
-                        col_aliases.add(c_alias)
+                    if with_aliases:
+                        if col[1] in col_aliases:
+                            c_alias = 'Col%d' % len(col_aliases)
+                            result.append('%s AS %s' % (r, c_alias))
+                            aliases.add(c_alias)
+                            col_aliases.add(c_alias)
+                        else:
+                            result.append('%s AS %s' % (r, col[1]))
+                            aliases.add(r)
+                            col_aliases.add(col[1])
                     else:
                         result.append(r)
                         aliases.add(r)
                         col_aliases.add(col[1])
                 else:
                     result.append(col.as_sql(quote_func=qn))
+
                     if hasattr(col, 'alias'):
                         aliases.add(col.alias)
                         col_aliases.add(col.alias)
+
         elif self.default_cols:
             cols, new_aliases = self.get_default_columns(with_aliases,
                     col_aliases)
             result.extend(cols)
             aliases.update(new_aliases)
+
+        result.extend([
+                '%s%s' % (
+                    aggregate.as_sql(quote_func=qn),
+                    alias is not None and ' AS %s' % alias or ''
+                    )
+                for alias, aggregate in self.aggregate_select.items()
+                ])
+
         # This loop customized for GeoQuery.
-        if not self.aggregate:
-            for (table, col), field in izip(self.related_select_cols, self.related_select_fields):
-                r = self.get_field_select(field, table)
-                if with_aliases and col in col_aliases:
-                    c_alias = 'Col%d' % len(col_aliases)
-                    result.append('%s AS %s' % (r, c_alias))
-                    aliases.add(c_alias)
-                    col_aliases.add(c_alias)
-                else:
-                    result.append(r)
-                    aliases.add(r)
-                    col_aliases.add(col)
+        for (table, col), field in izip(self.related_select_cols, self.related_select_fields):
+            r = self.get_field_select(field, table)
+            if with_aliases and col in col_aliases:
+                c_alias = 'Col%d' % len(col_aliases)
+                result.append('%s AS %s' % (r, c_alias))
+                aliases.add(c_alias)
+                col_aliases.add(c_alias)
+            else:
+                result.append(r)
+                aliases.add(r)
+                col_aliases.add(col)
 
         self._select_aliases = aliases
         return result
@@ -112,7 +149,7 @@ class GeoQuery(sql.Query):
         Returns a list of strings, quoted appropriately for use in SQL
         directly, as well as a set of aliases used in the select statement.
 
-        This routine is overridden from Query to handle customized selection of 
+        This routine is overridden from Query to handle customized selection of
         geometry columns.
         """
         result = []
@@ -154,20 +191,10 @@ class GeoQuery(sql.Query):
             return result, None
         return result, aliases
 
-    def get_ordering(self):
-        """
-        This routine is overridden to disable ordering for aggregate
-        spatial queries.
-        """
-        if not self.aggregate:
-            return super(GeoQuery, self).get_ordering()
-        else:
-            return ()
-
     def resolve_columns(self, row, fields=()):
         """
         This routine is necessary so that distances and geometries returned
-        from extra selection SQL get resolved appropriately into Python 
+        from extra selection SQL get resolved appropriately into Python
         objects.
         """
         values = []
@@ -183,7 +210,7 @@ class GeoQuery(sql.Query):
 
         # Converting any extra selection values (e.g., geometries and
         # distance objects added by GeoQuerySet methods).
-        values = [self.convert_values(v, self.extra_select_fields.get(a, None)) 
+        values = [self.convert_values(v, self.extra_select_fields.get(a, None))
                   for v, a in izip(row[rn_offset:index_start], aliases)]
         if SpatialBackend.oracle:
             # This is what happens normally in OracleQuery's `resolve_columns`.
@@ -212,6 +239,19 @@ class GeoQuery(sql.Query):
             value = SpatialBackend.Geometry(value)
         return value
 
+    def resolve_aggregate(self, value, aggregate):
+        """
+        Overridden from GeoQuery's normalize to handle the conversion of
+        GeoAggregate objects.
+        """
+        if isinstance(aggregate, self.aggregates_module.GeoAggregate):
+            if aggregate.is_extent:
+                return convert_extent(value)
+            else:
+                return convert_geom(value, aggregate.source)
+        else:
+            return super(GeoQuery, self).resolve_aggregate(value, aggregate)
+
     #### Routines unique to GeoQuery ####
     def get_extra_select_format(self, alias):
         sel_fmt = '%s'
@@ -222,9 +262,9 @@ class GeoQuery(sql.Query):
     def get_field_select(self, fld, alias=None):
         """
         Returns the SELECT SQL string for the given field.  Figures out
-        if any custom selection SQL is needed for the column  The `alias` 
-        keyword may be used to manually specify the database table where 
-        the column exists, if not in the model associated with this 
+        if any custom selection SQL is needed for the column  The `alias`
+        keyword may be used to manually specify the database table where
+        the column exists, if not in the model associated with this
         `GeoQuery`.
         """
         sel_fmt = self.get_select_format(fld)
@@ -263,15 +303,15 @@ class GeoQuery(sql.Query):
         """
         Recursive utility routine for checking the given name parameter
         on the given model.  Initially, the name parameter is a string,
-        of the field on the given model e.g., 'point', 'the_geom'. 
-        Related model field strings like 'address__point', may also be 
+        of the field on the given model e.g., 'point', 'the_geom'.
+        Related model field strings like 'address__point', may also be
         used.
 
-        If a GeometryField exists according to the given name parameter 
+        If a GeometryField exists according to the given name parameter
         it will be returned, otherwise returns False.
         """
         if isinstance(name_param, basestring):
-            # This takes into account the situation where the name is a 
+            # This takes into account the situation where the name is a
             # lookup to a related geographic field, e.g., 'address__point'.
             name_param = name_param.split(sql.constants.LOOKUP_SEP)
             name_param.reverse() # Reversing so list operates like a queue of related lookups.
@@ -284,7 +324,7 @@ class GeoQuery(sql.Query):
         except (FieldDoesNotExist, IndexError):
             return False
         # TODO: ManyToManyField?
-        if isinstance(fld, GeometryField): 
+        if isinstance(fld, GeometryField):
             return fld # A-OK.
         elif isinstance(fld, ForeignKey):
             # ForeignKey encountered, return the output of this utility called
@@ -297,12 +337,12 @@ class GeoQuery(sql.Query):
         """
         Helper function that returns the database column for the given field.
         The table and column are returned (quoted) in the proper format, e.g.,
-        `"geoapp_city"."point"`.  If `table_alias` is not specified, the 
+        `"geoapp_city"."point"`.  If `table_alias` is not specified, the
         database table associated with the model of this `GeoQuery` will be
         used.
         """
         if table_alias is None: table_alias = self.model._meta.db_table
-        return "%s.%s" % (self.quote_name_unless_alias(table_alias), 
+        return "%s.%s" % (self.quote_name_unless_alias(table_alias),
                           self.connection.ops.quote_name(field.column))
 
     def _geo_field(self, field_name=None):
@@ -333,5 +373,5 @@ class DistanceField(object):
 
 # Rather than use GeometryField (which requires a SQL query
 # upon instantiation), use this lighter weight class.
-class GeomField(object): 
+class GeomField(object):
     pass

+ 23 - 0
django/db/backends/__init__.py

@@ -10,6 +10,12 @@ except NameError:
     # Python 2.3 compat
     from sets import Set as set
 
+try:
+    import decimal
+except ImportError:
+    # Python 2.3 fallback
+    from django.utils import _decimal as decimal
+
 from django.db.backends import util
 from django.utils import datetime_safe
 
@@ -62,6 +68,7 @@ class BaseDatabaseWrapper(local):
         return util.CursorDebugWrapper(cursor, self)
 
 class BaseDatabaseFeatures(object):
+    allows_group_by_pk = False
     # True if django.db.backend.utils.typecast_timestamp is used on values
     # returned from dates() calls.
     needs_datetime_string_cast = True
@@ -376,6 +383,22 @@ class BaseDatabaseOperations(object):
         """
         return self.year_lookup_bounds(value)
 
+    def convert_values(self, value, field):
+        """Coerce the value returned by the database backend into a consistent type that
+        is compatible with the field type.
+        """
+        internal_type = field.get_internal_type()
+        if internal_type == 'DecimalField':
+            return value
+        elif internal_type and internal_type.endswith('IntegerField') or internal_type == 'AutoField':
+            return int(value)
+        elif internal_type in ('DateField', 'DateTimeField', 'TimeField'):
+            return value
+        # No field, or the field isn't known to be a decimal or integer
+        # Default to a float
+        return float(value)
+
+
 class BaseDatabaseIntrospection(object):
     """
     This class encapsulates all backend-specific introspection utilities

+ 1 - 0
django/db/backends/mysql/base.py

@@ -110,6 +110,7 @@ class CursorWrapper(object):
 class DatabaseFeatures(BaseDatabaseFeatures):
     empty_fetchmany_value = ()
     update_can_self_select = False
+    allows_group_by_pk = True
     related_fields_match_type = True
 
 class DatabaseOperations(BaseDatabaseOperations):

+ 9 - 11
django/db/backends/oracle/query.py

@@ -53,21 +53,23 @@ def query_class(QueryClass, Database):
             return values
 
         def convert_values(self, value, field):
-            from django.db.models.fields import DateField, DateTimeField, \
-                 TimeField, BooleanField, NullBooleanField, DecimalField, Field
+            from django.db.models.fields import Field
             if isinstance(value, Database.LOB):
                 value = value.read()
             # Oracle stores empty strings as null. We need to undo this in
             # order to adhere to the Django convention of using the empty
             # string instead of null, but only if the field accepts the
             # empty string.
-            if value is None and isinstance(field, Field) and field.empty_strings_allowed:
+            if value is None and field and field.empty_strings_allowed:
                 value = u''
             # Convert 1 or 0 to True or False
-            elif value in (1, 0) and isinstance(field, (BooleanField, NullBooleanField)):
+            elif value in (1, 0) and field and field.get_internal_type() in ('BooleanField', 'NullBooleanField'):
                 value = bool(value)
+            # Force floats to the correct type
+            elif value is not None and field and field.get_internal_type() == 'FloatField':
+                value = float(value)
             # Convert floats to decimals
-            elif value is not None and isinstance(field, DecimalField):
+            elif value is not None and field and field.get_internal_type() == 'DecimalField':
                 value = util.typecast_decimal(field.format_number(value))
             # cx_Oracle always returns datetime.datetime objects for
             # DATE and TIMESTAMP columns, but Django wants to see a
@@ -86,13 +88,9 @@ def query_class(QueryClass, Database):
                     value = datetime.datetime(value.year, value.month,
                             value.day, value.hour, value.minute, value.second,
                             value.fsecond)
-                if isinstance(field, DateTimeField):
-                    # DateTimeField subclasses DateField so must be checked
-                    # first.
-                    pass
-                elif isinstance(field, DateField):
+                if field and field.get_internal_type() == 'DateField':
                     value = value.date()
-                elif isinstance(field, TimeField) or (value.year == 1900 and value.month == value.day == 1):
+                elif field and field.get_internal_type() == 'TimeField' or (value.year == 1900 and value.month == value.day == 1):
                     value = value.time()
                 elif value.hour == value.minute == value.second == value.microsecond == 0:
                     value = value.date()

+ 21 - 1
django/db/backends/sqlite3/base.py

@@ -10,7 +10,7 @@ from django.db.backends import *
 from django.db.backends.sqlite3.client import DatabaseClient
 from django.db.backends.sqlite3.creation import DatabaseCreation
 from django.db.backends.sqlite3.introspection import DatabaseIntrospection
-from django.utils.safestring import SafeString                                                           
+from django.utils.safestring import SafeString
 
 try:
     try:
@@ -102,6 +102,26 @@ class DatabaseOperations(BaseDatabaseOperations):
         second = '%s-12-31 23:59:59.999999'
         return [first % value, second % value]
 
+    def convert_values(self, value, field):
+        """SQLite returns floats when it should be returning decimals,
+        and gets dates and datetimes wrong.
+        For consistency with other backends, coerce when required.
+        """
+        internal_type = field.get_internal_type()
+        if internal_type == 'DecimalField':
+            return util.typecast_decimal(field.format_number(value))
+        elif internal_type and internal_type.endswith('IntegerField') or internal_type == 'AutoField':
+            return int(value)
+        elif internal_type == 'DateField':
+            return util.typecast_date(value)
+        elif internal_type == 'DateTimeField':
+            return util.typecast_timestamp(value)
+        elif internal_type == 'TimeField':
+            return util.typecast_time(value)
+
+        # No field, or the field isn't known to be a decimal or integer
+        return value
+
 class DatabaseWrapper(BaseDatabaseWrapper):
 
     # SQLite requires LIKE statements to include an ESCAPE clause if the value

+ 1 - 0
django/db/models/__init__.py

@@ -5,6 +5,7 @@ from django.db.models.loading import get_apps, get_app, get_models, get_model, r
 from django.db.models.query import Q
 from django.db.models.manager import Manager
 from django.db.models.base import Model
+from django.db.models.aggregates import *
 from django.db.models.fields import *
 from django.db.models.fields.subclassing import SubfieldBase
 from django.db.models.fields.files import FileField, ImageField

+ 66 - 0
django/db/models/aggregates.py

@@ -0,0 +1,66 @@
+"""
+Classes to represent the definitions of aggregate functions.
+"""
+
+class Aggregate(object):
+    """
+    Default Aggregate definition.
+    """
+    def __init__(self, lookup, **extra):
+        """Instantiate a new aggregate.
+
+         * lookup is the field on which the aggregate operates.
+         * extra is a dictionary of additional data to provide for the
+           aggregate definition
+
+        Also utilizes the class variables:
+         * name, the identifier for this aggregate function.
+        """
+        self.lookup = lookup
+        self.extra = extra
+
+    def _default_alias(self):
+        return '%s__%s' % (self.lookup, self.name.lower())
+    default_alias = property(_default_alias)
+
+    def add_to_query(self, query, alias, col, source, is_summary):
+        """Add the aggregate to the nominated query.
+
+        This method is used to convert the generic Aggregate definition into a
+        backend-specific definition.
+
+         * query is the backend-specific query instance to which the aggregate
+           is to be added.
+         * col is a column reference describing the subject field
+           of the aggregate. It can be an alias, or a tuple describing
+           a table and column name.
+         * source is the underlying field or aggregate definition for
+           the column reference. If the aggregate is not an ordinal or
+           computed type, this reference is used to determine the coerced
+           output type of the aggregate.
+         * is_summary is a boolean that is set True if the aggregate is a
+           summary value rather than an annotation.
+        """
+        aggregate = getattr(query.aggregates_module, self.name)
+        query.aggregate_select[alias] = aggregate(col, source=source, is_summary=is_summary, **self.extra)
+
+class Avg(Aggregate):
+    name = 'Avg'
+
+class Count(Aggregate):
+    name = 'Count'
+
+class Max(Aggregate):
+    name = 'Max'
+
+class Min(Aggregate):
+    name = 'Min'
+
+class StdDev(Aggregate):
+    name = 'StdDev'
+
+class Sum(Aggregate):
+    name = 'Sum'
+
+class Variance(Aggregate):
+    name = 'Variance'

+ 6 - 0
django/db/models/manager.py

@@ -101,6 +101,12 @@ class Manager(object):
     def filter(self, *args, **kwargs):
         return self.get_query_set().filter(*args, **kwargs)
 
+    def aggregate(self, *args, **kwargs):
+        return self.get_query_set().aggregate(*args, **kwargs)
+
+    def annotate(self, *args, **kwargs):
+        return self.get_query_set().annotate(*args, **kwargs)
+
     def complex_filter(self, *args, **kwargs):
         return self.get_query_set().complex_filter(*args, **kwargs)
 

+ 82 - 7
django/db/models/query.py

@@ -4,6 +4,7 @@ except NameError:
     from sets import Set as set     # Python 2.3 fallback
 
 from django.db import connection, transaction, IntegrityError
+from django.db.models.aggregates import Aggregate
 from django.db.models.fields import DateField
 from django.db.models.query_utils import Q, select_related_descend
 from django.db.models import signals, sql
@@ -270,18 +271,47 @@ class QuerySet(object):
         else:
             requested = None
         max_depth = self.query.max_depth
+
         extra_select = self.query.extra_select.keys()
+        aggregate_select = self.query.aggregate_select.keys()
+
         index_start = len(extra_select)
+        aggregate_start = index_start + len(self.model._meta.fields)
+
         for row in self.query.results_iter():
             if fill_cache:
-                obj, _ = get_cached_row(self.model, row, index_start,
-                        max_depth, requested=requested)
+                obj, aggregate_start = get_cached_row(self.model, row,
+                                    index_start, max_depth, requested=requested)
             else:
-                obj = self.model(*row[index_start:])
+                # omit aggregates in object creation
+                obj = self.model(*row[index_start:aggregate_start])
+
             for i, k in enumerate(extra_select):
                 setattr(obj, k, row[i])
+
+            # Add the aggregates to the model
+            for i, aggregate in enumerate(aggregate_select):
+                setattr(obj, aggregate, row[i+aggregate_start])
+
             yield obj
 
+    def aggregate(self, *args, **kwargs):
+        """
+        Returns a dictionary containing the calculations (aggregation)
+        over the current queryset
+
+        If args is present the expression is passed as a kwarg ussing
+        the Aggregate object's default alias.
+        """
+        for arg in args:
+            kwargs[arg.default_alias] = arg
+
+        for (alias, aggregate_expr) in kwargs.items():
+            self.query.add_aggregate(aggregate_expr, self.model, alias,
+                is_summary=True)
+
+        return self.query.get_aggregation()
+
     def count(self):
         """
         Performs a SELECT COUNT() and returns the number of records as an
@@ -553,6 +583,25 @@ class QuerySet(object):
         """
         self.query.select_related = other.query.select_related
 
+    def annotate(self, *args, **kwargs):
+        """
+        Return a query set in which the returned objects have been annotated
+        with data aggregated from related fields.
+        """
+        for arg in args:
+            kwargs[arg.default_alias] = arg
+
+        obj = self._clone()
+
+        obj._setup_aggregate_query()
+
+        # Add the aggregates to the query
+        for (alias, aggregate_expr) in kwargs.items():
+            obj.query.add_aggregate(aggregate_expr, self.model, alias,
+                is_summary=False)
+
+        return obj
+
     def order_by(self, *field_names):
         """
         Returns a new QuerySet instance with the ordering changed.
@@ -641,6 +690,16 @@ class QuerySet(object):
         """
         pass
 
+    def _setup_aggregate_query(self):
+        """
+        Prepare the query for computing a result that contains aggregate annotations.
+        """
+        opts = self.model._meta
+        if not self.query.group_by:
+            field_names = [f.attname for f in opts.fields]
+            self.query.add_fields(field_names, False)
+            self.query.set_group_by()
+
     def as_sql(self):
         """
         Returns the internal query's SQL and parameters (as a tuple).
@@ -669,6 +728,8 @@ class ValuesQuerySet(QuerySet):
             len(self.field_names) != len(self.model._meta.fields)):
             self.query.trim_extra_select(self.extra_names)
         names = self.query.extra_select.keys() + self.field_names
+        names.extend(self.query.aggregate_select.keys())
+
         for row in self.query.results_iter():
             yield dict(zip(names, row))
 
@@ -682,20 +743,25 @@ class ValuesQuerySet(QuerySet):
         """
         self.query.clear_select_fields()
         self.extra_names = []
+        self.aggregate_names = []
+
         if self._fields:
-            if not self.query.extra_select:
+            if not self.query.extra_select and not self.query.aggregate_select:
                 field_names = list(self._fields)
             else:
                 field_names = []
                 for f in self._fields:
                     if self.query.extra_select.has_key(f):
                         self.extra_names.append(f)
+                    elif self.query.aggregate_select.has_key(f):
+                        self.aggregate_names.append(f)
                     else:
                         field_names.append(f)
         else:
             # Default to all fields.
             field_names = [f.attname for f in self.model._meta.fields]
 
+        self.query.select = []
         self.query.add_fields(field_names, False)
         self.query.default_cols = False
         self.field_names = field_names
@@ -711,6 +777,7 @@ class ValuesQuerySet(QuerySet):
             c._fields = self._fields[:]
         c.field_names = self.field_names
         c.extra_names = self.extra_names
+        c.aggregate_names = self.aggregate_names
         if setup and hasattr(c, '_setup_query'):
             c._setup_query()
         return c
@@ -718,10 +785,18 @@ class ValuesQuerySet(QuerySet):
     def _merge_sanity_check(self, other):
         super(ValuesQuerySet, self)._merge_sanity_check(other)
         if (set(self.extra_names) != set(other.extra_names) or
-                set(self.field_names) != set(other.field_names)):
+                set(self.field_names) != set(other.field_names) or
+                self.aggregate_names != other.aggregate_names):
             raise TypeError("Merging '%s' classes must involve the same values in each case."
                     % self.__class__.__name__)
 
+    def _setup_aggregate_query(self):
+        """
+        Prepare the query for computing a result that contains aggregate annotations.
+        """
+        self.query.set_group_by()
+
+        super(ValuesQuerySet, self)._setup_aggregate_query()
 
 class ValuesListQuerySet(ValuesQuerySet):
     def iterator(self):
@@ -729,14 +804,14 @@ class ValuesListQuerySet(ValuesQuerySet):
         if self.flat and len(self._fields) == 1:
             for row in self.query.results_iter():
                 yield row[0]
-        elif not self.query.extra_select:
+        elif not self.query.extra_select and not self.query.aggregate_select:
             for row in self.query.results_iter():
                 yield tuple(row)
         else:
             # When extra(select=...) is involved, the extra cols come are
             # always at the start of the row, so we need to reorder the fields
             # to match the order in self._fields.
-            names = self.query.extra_select.keys() + self.field_names
+            names = self.query.extra_select.keys() + self.field_names + self.query.aggregate_select.keys()
             for row in self.query.results_iter():
                 data = dict(zip(names, row))
                 yield tuple([data[f] for f in self._fields])

+ 0 - 1
django/db/models/query_utils.py

@@ -64,4 +64,3 @@ def select_related_descend(field, restricted, requested):
     if not restricted and field.null:
         return False
     return True
-

+ 130 - 0
django/db/models/sql/aggregates.py

@@ -0,0 +1,130 @@
+"""
+Classes to represent the default SQL aggregate functions
+"""
+
+class AggregateField(object):
+    """An internal field mockup used to identify aggregates in the
+    data-conversion parts of the database backend.
+    """
+    def __init__(self, internal_type):
+        self.internal_type = internal_type
+    def get_internal_type(self):
+        return self.internal_type
+
+ordinal_aggregate_field = AggregateField('IntegerField')
+computed_aggregate_field = AggregateField('FloatField')
+
+class Aggregate(object):
+    """
+    Default SQL Aggregate.
+    """
+    is_ordinal = False
+    is_computed = False
+    sql_template = '%(function)s(%(field)s)'
+
+    def __init__(self, col, source=None, is_summary=False, **extra):
+        """Instantiate an SQL aggregate
+
+         * col is a column reference describing the subject field
+           of the aggregate. It can be an alias, or a tuple describing
+           a table and column name.
+         * source is the underlying field or aggregate definition for
+           the column reference. If the aggregate is not an ordinal or
+           computed type, this reference is used to determine the coerced
+           output type of the aggregate.
+         * extra is a dictionary of additional data to provide for the
+           aggregate definition
+
+        Also utilizes the class variables:
+         * sql_function, the name of the SQL function that implements the
+           aggregate.
+         * sql_template, a template string that is used to render the
+           aggregate into SQL.
+         * is_ordinal, a boolean indicating if the output of this aggregate
+           is an integer (e.g., a count)
+         * is_computed, a boolean indicating if this output of this aggregate
+           is a computed float (e.g., an average), regardless of the input
+           type.
+
+        """
+        self.col = col
+        self.source = source
+        self.is_summary = is_summary
+        self.extra = extra
+
+        # Follow the chain of aggregate sources back until you find an
+        # actual field, or an aggregate that forces a particular output
+        # type. This type of this field will be used to coerce values
+        # retrieved from the database.
+        tmp = self
+
+        while tmp and isinstance(tmp, Aggregate):
+            if getattr(tmp, 'is_ordinal', False):
+                tmp = ordinal_aggregate_field
+            elif getattr(tmp, 'is_computed', False):
+                tmp = computed_aggregate_field
+            else:
+                tmp = tmp.source
+
+        self.field = tmp
+
+    def relabel_aliases(self, change_map):
+        if isinstance(self.col, (list, tuple)):
+            self.col = (change_map.get(self.col[0], self.col[0]), self.col[1])
+
+    def as_sql(self, quote_func=None):
+        "Return the aggregate, rendered as SQL."
+        if not quote_func:
+            quote_func = lambda x: x
+
+        if hasattr(self.col, 'as_sql'):
+            field_name = self.col.as_sql(quote_func)
+        elif isinstance(self.col, (list, tuple)):
+            field_name = '.'.join([quote_func(c) for c in self.col])
+        else:
+            field_name = self.col
+
+        params = {
+            'function': self.sql_function,
+            'field': field_name
+        }
+        params.update(self.extra)
+
+        return self.sql_template % params
+
+
+class Avg(Aggregate):
+    is_computed = True
+    sql_function = 'AVG'
+
+class Count(Aggregate):
+    is_ordinal = True
+    sql_function = 'COUNT'
+    sql_template = '%(function)s(%(distinct)s%(field)s)'
+
+    def __init__(self, col, distinct=False, **extra):
+        super(Count, self).__init__(col, distinct=distinct and 'DISTINCT ' or '', **extra)
+
+class Max(Aggregate):
+    sql_function = 'MAX'
+
+class Min(Aggregate):
+    sql_function = 'MIN'
+
+class StdDev(Aggregate):
+    is_computed = True
+
+    def __init__(self, col, sample=False, **extra):
+        super(StdDev, self).__init__(col, **extra)
+        self.sql_function = sample and 'STDDEV_SAMP' or 'STDDEV_POP'
+
+class Sum(Aggregate):
+    sql_function = 'SUM'
+
+class Variance(Aggregate):
+    is_computed = True
+
+    def __init__(self, col, sample=False, **extra):
+        super(Variance, self).__init__(col, **extra)
+        self.sql_function = sample and 'VAR_SAMP' or 'VAR_POP'
+

+ 0 - 53
django/db/models/sql/datastructures.py

@@ -25,59 +25,6 @@ class RawValue(object):
     def __init__(self, value):
         self.value = value
 
-class Aggregate(object):
-    """
-    Base class for all aggregate-related classes (min, max, avg, count, sum).
-    """
-    def relabel_aliases(self, change_map):
-        """
-        Relabel the column alias, if necessary. Must be implemented by
-        subclasses.
-        """
-        raise NotImplementedError
-
-    def as_sql(self, quote_func=None):
-        """
-        Returns the SQL string fragment for this object.
-
-        The quote_func function is used to quote the column components. If
-        None, it defaults to doing nothing.
-
-        Must be implemented by subclasses.
-        """
-        raise NotImplementedError
-
-class Count(Aggregate):
-    """
-    Perform a count on the given column.
-    """
-    def __init__(self, col='*', distinct=False):
-        """
-        Set the column to count on (defaults to '*') and set whether the count
-        should be distinct or not.
-        """
-        self.col = col
-        self.distinct = distinct
-
-    def relabel_aliases(self, change_map):
-        c = self.col
-        if isinstance(c, (list, tuple)):
-            self.col = (change_map.get(c[0], c[0]), c[1])
-
-    def as_sql(self, quote_func=None):
-        if not quote_func:
-            quote_func = lambda x: x
-        if isinstance(self.col, (list, tuple)):
-            col = ('%s.%s' % tuple([quote_func(c) for c in self.col]))
-        elif hasattr(self.col, 'as_sql'):
-            col = self.col.as_sql(quote_func)
-        else:
-            col = self.col
-        if self.distinct:
-            return 'COUNT(DISTINCT %s)' % col
-        else:
-            return 'COUNT(%s)' % col
-
 class Date(object):
     """
     Add a date selection column.

+ 239 - 68
django/db/models/sql/query.py

@@ -12,12 +12,13 @@ from copy import deepcopy
 from django.utils.tree import Node
 from django.utils.datastructures import SortedDict
 from django.utils.encoding import force_unicode
+from django.db.backends.util import truncate_name
 from django.db import connection
 from django.db.models import signals
 from django.db.models.fields import FieldDoesNotExist
 from django.db.models.query_utils import select_related_descend
+from django.db.models.sql import aggregates as base_aggregates_module
 from django.db.models.sql.where import WhereNode, Constraint, EverythingNode, AND, OR
-from django.db.models.sql.datastructures import Count
 from django.core.exceptions import FieldError
 from datastructures import EmptyResultSet, Empty, MultiJoin
 from constants import *
@@ -40,6 +41,7 @@ class BaseQuery(object):
 
     alias_prefix = 'T'
     query_terms = QUERY_TERMS
+    aggregates_module = base_aggregates_module
 
     def __init__(self, model, connection, where=WhereNode):
         self.model = model
@@ -73,6 +75,9 @@ class BaseQuery(object):
         self.select_related = False
         self.related_select_cols = []
 
+        # SQL aggregate-related attributes
+        self.aggregate_select = SortedDict() # Maps alias -> SQL aggregate function
+
         # Arbitrary maximum limit for select_related. Prevents infinite
         # recursion. Can be changed by the depth parameter to select_related().
         self.max_depth = 5
@@ -178,6 +183,7 @@ class BaseQuery(object):
         obj.distinct = self.distinct
         obj.select_related = self.select_related
         obj.related_select_cols = []
+        obj.aggregate_select = self.aggregate_select.copy()
         obj.max_depth = self.max_depth
         obj.extra_select = self.extra_select.copy()
         obj.extra_tables = self.extra_tables
@@ -194,6 +200,35 @@ class BaseQuery(object):
             obj._setup_query()
         return obj
 
+    def convert_values(self, value, field):
+        """Convert the database-returned value into a type that is consistent
+        across database backends.
+
+        By default, this defers to the underlying backend operations, but
+        it can be overridden by Query classes for specific backends.
+        """
+        return self.connection.ops.convert_values(value, field)
+
+    def resolve_aggregate(self, value, aggregate):
+        """Resolve the value of aggregates returned by the database to
+        consistent (and reasonable) types.
+
+        This is required because of the predisposition of certain backends
+        to return Decimal and long types when they are not needed.
+        """
+        if value is None:
+            # Return None as-is
+            return value
+        elif aggregate.is_ordinal:
+            # Any ordinal aggregate (e.g., count) returns an int
+            return int(value)
+        elif aggregate.is_computed:
+            # Any computed aggregate (e.g., avg) returns a float
+            return float(value)
+        else:
+            # Return value depends on the type of the field being processed.
+            return self.convert_values(value, aggregate.field)
+
     def results_iter(self):
         """
         Returns an iterator over the results from executing this query.
@@ -212,29 +247,78 @@ class BaseQuery(object):
                         else:
                             fields = self.model._meta.fields
                     row = self.resolve_columns(row, fields)
+
+                if self.aggregate_select:
+                    aggregate_start = len(self.extra_select.keys()) + len(self.select)
+                    row = tuple(row[:aggregate_start]) + tuple([
+                        self.resolve_aggregate(value, aggregate)
+                        for (alias, aggregate), value
+                        in zip(self.aggregate_select.items(), row[aggregate_start:])
+                    ])
+
                 yield row
 
+    def get_aggregation(self):
+        """
+        Returns the dictionary with the values of the existing aggregations.
+        """
+        if not self.aggregate_select:
+            return {}
+
+        # If there is a group by clause, aggregating does not add useful
+        # information but retrieves only the first row. Aggregate
+        # over the subquery instead.
+        if self.group_by:
+            from subqueries import AggregateQuery
+            query = AggregateQuery(self.model, self.connection)
+
+            obj = self.clone()
+
+            # Remove any aggregates marked for reduction from the subquery
+            # and move them to the outer AggregateQuery.
+            for alias, aggregate in self.aggregate_select.items():
+                if aggregate.is_summary:
+                    query.aggregate_select[alias] = aggregate
+                    del obj.aggregate_select[alias]
+
+            query.add_subquery(obj)
+        else:
+            query = self
+            self.select = []
+            self.default_cols = False
+            self.extra_select = {}
+
+        query.clear_ordering(True)
+        query.clear_limits()
+        query.select_related = False
+        query.related_select_cols = []
+        query.related_select_fields = []
+
+        return dict([
+            (alias, self.resolve_aggregate(val, aggregate))
+            for (alias, aggregate), val
+            in zip(query.aggregate_select.items(), query.execute_sql(SINGLE))
+        ])
+
     def get_count(self):
         """
         Performs a COUNT() query using the current filter constraints.
         """
-        from subqueries import CountQuery
         obj = self.clone()
-        obj.clear_ordering(True)
-        obj.clear_limits()
-        obj.select_related = False
-        obj.related_select_cols = []
-        obj.related_select_fields = []
-        if len(obj.select) > 1:
-            obj = self.clone(CountQuery, _query=obj, where=self.where_class(),
-                    distinct=False)
-            obj.select = []
-            obj.extra_select = SortedDict()
+        if len(self.select) > 1:
+            # If a select clause exists, then the query has already started to
+            # specify the columns that are to be returned.
+            # In this case, we need to use a subquery to evaluate the count.
+            from subqueries import AggregateQuery
+            subquery = obj
+            subquery.clear_ordering(True)
+            subquery.clear_limits()
+
+            obj = AggregateQuery(obj.model, obj.connection)
+            obj.add_subquery(subquery)
+
         obj.add_count_column()
-        data = obj.execute_sql(SINGLE)
-        if not data:
-            return 0
-        number = data[0]
+        number = obj.get_aggregation()[None]
 
         # Apply offset and limit constraints manually, since using LIMIT/OFFSET
         # in SQL (in variants that provide them) doesn't change the COUNT
@@ -450,25 +534,41 @@ class BaseQuery(object):
             for col in self.select:
                 if isinstance(col, (list, tuple)):
                     r = '%s.%s' % (qn(col[0]), qn(col[1]))
-                    if with_aliases and col[1] in col_aliases:
-                        c_alias = 'Col%d' % len(col_aliases)
-                        result.append('%s AS %s' % (r, c_alias))
-                        aliases.add(c_alias)
-                        col_aliases.add(c_alias)
+                    if with_aliases:
+                        if col[1] in col_aliases:
+                            c_alias = 'Col%d' % len(col_aliases)
+                            result.append('%s AS %s' % (r, c_alias))
+                            aliases.add(c_alias)
+                            col_aliases.add(c_alias)
+                        else:
+                            result.append('%s AS %s' % (r, col[1]))
+                            aliases.add(r)
+                            col_aliases.add(col[1])
                     else:
                         result.append(r)
                         aliases.add(r)
                         col_aliases.add(col[1])
                 else:
                     result.append(col.as_sql(quote_func=qn))
+
                     if hasattr(col, 'alias'):
                         aliases.add(col.alias)
                         col_aliases.add(col.alias)
+
         elif self.default_cols:
             cols, new_aliases = self.get_default_columns(with_aliases,
                     col_aliases)
             result.extend(cols)
             aliases.update(new_aliases)
+
+        result.extend([
+            '%s%s' % (
+                aggregate.as_sql(quote_func=qn),
+                alias is not None and ' AS %s' % qn(alias) or ''
+            )
+            for alias, aggregate in self.aggregate_select.items()
+        ])
+
         for table, col in self.related_select_cols:
             r = '%s.%s' % (qn(table), qn(col))
             if with_aliases and col in col_aliases:
@@ -538,7 +638,7 @@ class BaseQuery(object):
         Returns a list of strings that are joined together to go after the
         "FROM" part of the query, as well as a list any extra parameters that
         need to be included. Sub-classes, can override this to create a
-        from-clause via a "select", for example (e.g. CountQuery).
+        from-clause via a "select".
 
         This should only be called after any SQL construction methods that
         might change the tables we need. This means the select columns and
@@ -635,10 +735,13 @@ class BaseQuery(object):
                     order = asc
                 result.append('%s %s' % (field, order))
                 continue
+            col, order = get_order_dir(field, asc)
+            if col in self.aggregate_select:
+                result.append('%s %s' % (col, order))
+                continue
             if '.' in field:
                 # This came in through an extra(order_by=...) addition. Pass it
                 # on verbatim.
-                col, order = get_order_dir(field, asc)
                 table, col = col.split('.', 1)
                 if (table, col) not in processed_pairs:
                     elt = '%s.%s' % (qn(table), col)
@@ -657,7 +760,6 @@ class BaseQuery(object):
                             ordering_aliases.append(elt)
                         result.append('%s %s' % (elt, order))
             else:
-                col, order = get_order_dir(field, asc)
                 elt = qn2(col)
                 if distinct and col not in select_aliases:
                     ordering_aliases.append(elt)
@@ -1068,6 +1170,48 @@ class BaseQuery(object):
             self.fill_related_selections(f.rel.to._meta, alias, cur_depth + 1,
                     used, next, restricted, new_nullable, dupe_set, avoid)
 
+    def add_aggregate(self, aggregate, model, alias, is_summary):
+        """
+        Adds a single aggregate expression to the Query
+        """
+        opts = model._meta
+        field_list = aggregate.lookup.split(LOOKUP_SEP)
+        if (len(field_list) == 1 and
+            aggregate.lookup in self.aggregate_select.keys()):
+            # Aggregate is over an annotation
+            field_name = field_list[0]
+            col = field_name
+            source = self.aggregate_select[field_name]
+        elif (len(field_list) > 1 or
+            field_list[0] not in [i.name for i in opts.fields]):
+            field, source, opts, join_list, last, _ = self.setup_joins(
+                field_list, opts, self.get_initial_alias(), False)
+
+            # Process the join chain to see if it can be trimmed
+            _, _, col, _, join_list = self.trim_joins(source, join_list, last, False)
+
+            # If the aggregate references a model or field that requires a join,
+            # those joins must be LEFT OUTER - empty join rows must be returned
+            # in order for zeros to be returned for those aggregates.
+            for column_alias in join_list:
+                self.promote_alias(column_alias, unconditional=True)
+
+            col = (join_list[-1], col)
+        else:
+            # Aggregate references a normal field
+            field_name = field_list[0]
+            source = opts.get_field(field_name)
+            if not (self.group_by and is_summary):
+                # Only use a column alias if this is a
+                # standalone aggregate, or an annotation
+                col = (opts.db_table, source.column)
+            else:
+                col = field_name
+
+        # Add the aggregate to the query
+        alias = truncate_name(alias, self.connection.ops.max_name_length())
+        aggregate.add_to_query(self, alias, col=col, source=source, is_summary=is_summary)
+
     def add_filter(self, filter_expr, connector=AND, negate=False, trim=False,
             can_reuse=None, process_extras=True):
         """
@@ -1119,6 +1263,11 @@ class BaseQuery(object):
         elif callable(value):
             value = value()
 
+        for alias, aggregate in self.aggregate_select.items():
+            if alias == parts[0]:
+                self.having.add((aggregate, lookup_type, value), AND)
+                return
+
         opts = self.get_meta()
         alias = self.get_initial_alias()
         allow_many = trim or not negate
@@ -1131,38 +1280,9 @@ class BaseQuery(object):
             self.split_exclude(filter_expr, LOOKUP_SEP.join(parts[:e.level]),
                     can_reuse)
             return
-        final = len(join_list)
-        penultimate = last.pop()
-        if penultimate == final:
-            penultimate = last.pop()
-        if trim and len(join_list) > 1:
-            extra = join_list[penultimate:]
-            join_list = join_list[:penultimate]
-            final = penultimate
-            penultimate = last.pop()
-            col = self.alias_map[extra[0]][LHS_JOIN_COL]
-            for alias in extra:
-                self.unref_alias(alias)
-        else:
-            col = target.column
-        alias = join_list[-1]
 
-        while final > 1:
-            # An optimization: if the final join is against the same column as
-            # we are comparing against, we can go back one step in the join
-            # chain and compare against the lhs of the join instead (and then
-            # repeat the optimization). The result, potentially, involves less
-            # table joins.
-            join = self.alias_map[alias]
-            if col != join[RHS_JOIN_COL]:
-                break
-            self.unref_alias(alias)
-            alias = join[LHS_ALIAS]
-            col = join[LHS_JOIN_COL]
-            join_list = join_list[:-1]
-            final -= 1
-            if final == penultimate:
-                penultimate = last.pop()
+        # Process the join chain to see if it can be trimmed
+        final, penultimate, col, alias, join_list = self.trim_joins(target, join_list, last, trim)
 
         if (lookup_type == 'isnull' and value is True and not negate and
                 final > 1):
@@ -1313,7 +1433,7 @@ class BaseQuery(object):
                         field, model, direct, m2m = opts.get_field_by_name(f.name)
                         break
                 else:
-                    names = opts.get_all_field_names()
+                    names = opts.get_all_field_names() + self.aggregate_select.keys()
                     raise FieldError("Cannot resolve keyword %r into field. "
                             "Choices are: %s" % (name, ", ".join(names)))
 
@@ -1462,6 +1582,43 @@ class BaseQuery(object):
 
         return field, target, opts, joins, last, extra_filters
 
+    def trim_joins(self, target, join_list, last, trim):
+        """An optimization: if the final join is against the same column as
+        we are comparing against, we can go back one step in a join
+        chain and compare against the LHS of the join instead (and then
+        repeat the optimization). The result, potentially, involves less
+        table joins.
+
+        Returns a tuple
+        """
+        final = len(join_list)
+        penultimate = last.pop()
+        if penultimate == final:
+            penultimate = last.pop()
+        if trim and len(join_list) > 1:
+            extra = join_list[penultimate:]
+            join_list = join_list[:penultimate]
+            final = penultimate
+            penultimate = last.pop()
+            col = self.alias_map[extra[0]][LHS_JOIN_COL]
+            for alias in extra:
+                self.unref_alias(alias)
+        else:
+            col = target.column
+        alias = join_list[-1]
+        while final > 1:
+            join = self.alias_map[alias]
+            if col != join[RHS_JOIN_COL]:
+                break
+            self.unref_alias(alias)
+            alias = join[LHS_ALIAS]
+            col = join[LHS_JOIN_COL]
+            join_list = join_list[:-1]
+            final -= 1
+            if final == penultimate:
+                penultimate = last.pop()
+        return final, penultimate, col, alias, join_list
+
     def update_dupe_avoidance(self, opts, col, alias):
         """
         For a column that is one of multiple pointing to the same table, update
@@ -1554,6 +1711,7 @@ class BaseQuery(object):
         """
         alias = self.get_initial_alias()
         opts = self.get_meta()
+
         try:
             for name in field_names:
                 field, target, u2, joins, u3, u4 = self.setup_joins(
@@ -1574,7 +1732,7 @@ class BaseQuery(object):
         except MultiJoin:
             raise FieldError("Invalid field name: '%s'" % name)
         except FieldError:
-            names = opts.get_all_field_names() + self.extra_select.keys()
+            names = opts.get_all_field_names() + self.extra_select.keys() + self.aggregate_select.keys()
             names.sort()
             raise FieldError("Cannot resolve keyword %r into field. "
                     "Choices are: %s" % (name, ", ".join(names)))
@@ -1609,38 +1767,52 @@ class BaseQuery(object):
         if force_empty:
             self.default_ordering = False
 
+    def set_group_by(self):
+        """
+        Expands the GROUP BY clause required by the query.
+
+        This will usually be the set of all non-aggregate fields in the
+        return data. If the database backend supports grouping by the
+        primary key, and the query would be equivalent, the optimization
+        will be made automatically.
+        """
+        if self.connection.features.allows_group_by_pk:
+            if len(self.select) == len(self.model._meta.fields):
+                self.group_by.append('.'.join([self.model._meta.db_table,
+                                               self.model._meta.pk.column]))
+                return
+
+        for sel in self.select:
+            self.group_by.append(sel)
+
     def add_count_column(self):
         """
         Converts the query to do count(...) or count(distinct(pk)) in order to
         get its size.
         """
-        # TODO: When group_by support is added, this needs to be adjusted so
-        # that it doesn't totally overwrite the select list.
         if not self.distinct:
             if not self.select:
-                select = Count()
+                count = self.aggregates_module.Count('*', is_summary=True)
             else:
                 assert len(self.select) == 1, \
                         "Cannot add count col with multiple cols in 'select': %r" % self.select
-                select = Count(self.select[0])
+                count = self.aggregates_module.Count(self.select[0])
         else:
             opts = self.model._meta
             if not self.select:
-                select = Count((self.join((None, opts.db_table, None, None)),
-                        opts.pk.column), True)
+                count = self.aggregates_module.Count((self.join((None, opts.db_table, None, None)), opts.pk.column),
+                                         is_summary=True, distinct=True)
             else:
                 # Because of SQL portability issues, multi-column, distinct
                 # counts need a sub-query -- see get_count() for details.
                 assert len(self.select) == 1, \
                         "Cannot add count col with multiple cols in 'select'."
-                select = Count(self.select[0], True)
 
+                count = self.aggregates_module.Count(self.select[0], distinct=True)
             # Distinct handling is done in Count(), so don't do it at this
             # level.
             self.distinct = False
-        self.select = [select]
-        self.select_fields = [None]
-        self.extra_select = {}
+        self.aggregate_select = {None: count}
 
     def add_select_related(self, fields):
         """
@@ -1758,7 +1930,6 @@ class BaseQuery(object):
                 return empty_iter()
             else:
                 return
-
         cursor = self.connection.cursor()
         cursor.execute(sql, params)
 

+ 20 - 10
django/db/models/sql/subqueries.py

@@ -9,7 +9,7 @@ from django.db.models.sql.query import Query
 from django.db.models.sql.where import AND, Constraint
 
 __all__ = ['DeleteQuery', 'UpdateQuery', 'InsertQuery', 'DateQuery',
-        'CountQuery']
+        'AggregateQuery']
 
 class DeleteQuery(Query):
     """
@@ -400,15 +400,25 @@ class DateQuery(Query):
         self.distinct = True
         self.order_by = order == 'ASC' and [1] or [-1]
 
-class CountQuery(Query):
+class AggregateQuery(Query):
     """
-    A CountQuery knows how to take a normal query which would select over
-    multiple distinct columns and turn it into SQL that can be used on a
-    variety of backends (it requires a select in the FROM clause).
+    An AggregateQuery takes another query as a parameter to the FROM
+    clause and only selects the elements in the provided list.
     """
-    def get_from_clause(self):
-        result, params = self._query.as_sql()
-        return ['(%s) A1' % result], params
+    def add_subquery(self, query):
+        self.subquery, self.sub_params = query.as_sql(with_col_aliases=True)
 
-    def get_ordering(self):
-        return ()
+    def as_sql(self, quote_func=None):
+        """
+        Creates the SQL for this query. Returns the SQL string and list of
+        parameters.
+        """
+        sql = ('SELECT %s FROM (%s) subquery' % (
+            ', '.join([
+                aggregate.as_sql()
+                for aggregate in self.aggregate_select.values()
+            ]),
+            self.subquery)
+        )
+        params = self.sub_params
+        return (sql, params)

+ 15 - 10
django/test/testcases.py

@@ -14,6 +14,7 @@ from django.test.client import Client
 from django.utils import simplejson
 
 normalize_long_ints = lambda s: re.sub(r'(?<![\w])(\d+)L(?![\w])', '\\1', s)
+normalize_decimals = lambda s: re.sub(r"Decimal\('(\d+(\.\d*)?)'\)", lambda m: "Decimal(\"%s\")" % m.groups()[0], s)
 
 def to_list(value):
     """
@@ -31,7 +32,7 @@ class OutputChecker(doctest.OutputChecker):
     def check_output(self, want, got, optionflags):
         "The entry method for doctest output checking. Defers to a sequence of child checkers"
         checks = (self.check_output_default,
-                  self.check_output_long,
+                  self.check_output_numeric,
                   self.check_output_xml,
                   self.check_output_json)
         for check in checks:
@@ -43,19 +44,23 @@ class OutputChecker(doctest.OutputChecker):
         "The default comparator provided by doctest - not perfect, but good for most purposes"
         return doctest.OutputChecker.check_output(self, want, got, optionflags)
 
-    def check_output_long(self, want, got, optionflags):
-        """Doctest does an exact string comparison of output, which means long
-        integers aren't equal to normal integers ("22L" vs. "22"). The
-        following code normalizes long integers so that they equal normal
-        integers.
+    def check_output_numeric(self, want, got, optionflags):
+        """Doctest does an exact string comparison of output, which means that
+        some numerically equivalent values aren't equal. This check normalizes
+         * long integers (22L) so that they equal normal integers. (22)
+         * Decimals so that they are comparable, regardless of the change
+           made to __repr__ in Python 2.6.
         """
-        return normalize_long_ints(want) == normalize_long_ints(got)
+        return doctest.OutputChecker.check_output(self,
+            normalize_decimals(normalize_long_ints(want)),
+            normalize_decimals(normalize_long_ints(got)),
+            optionflags)
 
     def check_output_xml(self, want, got, optionsflags):
         """Tries to do a 'xml-comparision' of want and got.  Plain string
         comparision doesn't always work because, for example, attribute
         ordering should not be important.
-        
+
         Based on http://codespeak.net/svn/lxml/trunk/src/lxml/doctestcompare.py
         """
         _norm_whitespace_re = re.compile(r'[ \t\n][ \t\n]+')
@@ -102,7 +107,7 @@ class OutputChecker(doctest.OutputChecker):
             wrapper = '<root>%s</root>'
             want = wrapper % want
             got = wrapper % got
-            
+
         # Parse the want and got strings, and compare the parsings.
         try:
             want_root = parseString(want).firstChild
@@ -174,7 +179,7 @@ class TestCase(unittest.TestCase):
         """Performs any pre-test setup. This includes:
 
             * Flushing the database.
-            * If the Test Case class has a 'fixtures' member, installing the 
+            * If the Test Case class has a 'fixtures' member, installing the
               named fixtures.
             * If the Test Case class has a 'urls' member, replace the
               ROOT_URLCONF with it.

+ 1 - 1
docs/index.txt

@@ -42,7 +42,7 @@ The model layer
     * **Models:** :ref:`Model syntax <topics-db-models>` | :ref:`Field types <ref-models-fields>` | :ref:`Meta options <ref-models-options>`
     * **QuerySets:** :ref:`Executing queries <topics-db-queries>` | :ref:`QuerySet method reference <ref-models-querysets>`
     * **Model instances:** :ref:`Instance methods <ref-models-instances>` | :ref:`Accessing related objects <ref-models-relations>`
-    * **Advanced:** :ref:`Managers <topics-db-managers>` | :ref:`Raw SQL <topics-db-sql>` | :ref:`Transactions <topics-db-transactions>` | :ref:`Custom fields <howto-custom-model-fields>`
+    * **Advanced:** :ref:`Managers <topics-db-managers>` | :ref:`Raw SQL <topics-db-sql>` | :ref:`Transactions <topics-db-transactions>` | :ref:`Aggregation <topics-db-aggregation>` | :ref:`Custom fields <howto-custom-model-fields>`
     * **Other:** :ref:`Supported databases <ref-databases>` | :ref:`Legacy databases <howto-legacy-databases>` | :ref:`Providing initial data <howto-initial-data>`
 
 The template layer

+ 1 - 1
docs/ref/models/index.txt

@@ -7,7 +7,7 @@ Model API reference. For introductory material, see :ref:`topics-db-models`.
 
 .. toctree::
    :maxdepth: 1
-   
+
    fields
    relations
    options

+ 186 - 0
docs/ref/models/querysets.txt

@@ -158,6 +158,48 @@ In SQL terms, that evaluates to::
 
 Note the second example is more restrictive.
 
+``annotate(*args, **kwargs)``
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. versionadded:: 1.1
+
+Annotates each object in the ``QuerySet`` with the provided list of
+aggregate values (averages, sums, etc) that have been computed over
+the objects that are related to the objects in the ``QuerySet``.
+Each argument to ``annotate()`` is an annotation that will be added
+to each object in the ``QuerySet`` that is returned.
+
+The aggregation functions that are provided by Django are described
+in `Aggregation Functions`_ below.
+
+Annotations specified using keyword arguments will use the keyword as
+the alias for the annotation. Anonymous arguments will have an alias
+generated for them based upon the name of the aggregate function and
+the model field that is being aggregated.
+
+For example, if you were manipulating a list of blogs, you may want
+to determine how many entries have been made in each blog::
+
+    >>> q = Blog.objects.annotate(Count('entry'))
+    # The name of the first blog
+    >>> q[0].name
+    'Blogasaurus'
+    # The number of entries on the first blog
+    >>> q[0].entry__count
+    42
+
+The ``Blog`` model doesn't define an ``entry_count`` attribute by itself,
+but by using a keyword argument to specify the aggregate function, you can
+control the name of the annotation::
+
+    >>> q = Blog.objects.annotate(number_of_entries=Count('entry'))
+    # The number of entries on the first blog, using the name provided
+    >>> q[0].number_of_entries
+    42
+
+For an in-depth discussion of aggregation, see :ref:`the topic guide on
+Aggregation <topics-db-aggregation>`.
+
 ``order_by(*fields)``
 ~~~~~~~~~~~~~~~~~~~~~
 
@@ -931,6 +973,38 @@ exist with the given parameters.
 
 Note ``latest()`` exists purely for convenience and readability.
 
+``aggregate(*args, **kwargs)``
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. versionadded:: 1.1
+
+Returns a dictionary of aggregate values (averages, sums, etc) calculated
+over the ``QuerySet``. Each argument to ``aggregate()`` specifies
+a value that will be included in the dictionary that is returned.
+
+The aggregation functions that are provided by Django are described
+in `Aggregation Functions`_ below.
+
+Aggregates specified using keyword arguments will use the keyword as
+the name for the annotation. Anonymous arguments will have an name
+generated for them based upon the name of the aggregate function and
+the model field that is being aggregated.
+
+For example, if you were manipulating blog entries, you may want to know
+the average number of authors contributing to blog entries::
+
+    >>> q = Blog.objects.aggregate(Count('entry'))
+    {'entry__count': 16}
+
+By using a keyword argument to specify the aggregate function, you can
+control the name of the aggregation value that is returned::
+
+    >>> q = Blog.objects.aggregate(number_of_entries=Count('entry'))
+    {'number_of_entries': 2.34}
+
+For an in-depth discussion of aggregation, see :ref:`the topic guide on
+Aggregation <topics-db-aggregation>`.
+
 .. _field-lookups:
 
 Field lookups
@@ -1326,3 +1400,115 @@ SQL equivalents::
 
     SELECT ... WHERE title REGEXP '(?i)^(an?|the) +'; -- SQLite
 
+.. _aggregation-functions:
+
+Aggregation Functions
+---------------------
+
+.. versionadded:: 1.1
+
+Django provides the following aggregation functions in the
+``django.db.models`` module.
+
+``Avg``
+~~~~~~~
+
+.. class:: Avg(field)
+
+Returns the mean value of the given field.
+
+    * Default alias: ``<field>__avg``
+    * Return type: float
+
+``Count``
+~~~~~~~~~
+
+.. class:: Count(field, distinct=False)
+
+Returns the number of objects that are related through the provided field.
+
+    * Default alias: ``<field>__count``
+    * Return type: integer
+
+Has one optional argument:
+
+.. attribute:: distinct
+
+    If distinct=True, the count will only include unique instances. This has
+    the SQL equivalent of ``COUNT(DISTINCT field)``. Default value is ``False``.
+
+``Max``
+~~~~~~~
+
+.. class:: Max(field)
+
+Returns the maximum value of the given field.
+
+    * Default alias: ``<field>__max``
+    * Return type: same as input field
+
+``Min``
+~~~~~~~
+
+.. class:: Min(field)
+
+Returns the minimum value of the given field.
+
+    * Default alias: ``<field>__min``
+    * Return type: same as input field
+
+``StdDev``
+~~~~~~~~~
+
+.. class:: StdDev(field, sample=False)
+
+Returns the standard deviation of the data in the provided field.
+
+    * Default alias: ``<field>__stddev``
+    * Return type: float
+
+Has one optional argument:
+
+.. attribute:: sample
+
+    By default, ``StdDev`` returns the population standard deviation. However,
+    if ``sample=True``, the return value will be the sample standard deviation.
+
+.. admonition:: SQLite
+
+    SQLite doesn't provide ``StdDev`` out of the box. An implementation is
+    available as an extension module for SQLite. Consult the SQlite
+    documentation for instructions on obtaining and installing this extension.
+
+``Sum``
+~~~~~~~
+
+.. class:: Sum(field)
+
+Computes the sum of all values of the given field.
+
+    * Default alias: ``<field>__sum``
+    * Return type: same as input field
+
+``Variance``
+~~~~~~~~~
+
+.. class:: Variance(field, sample=False)
+
+Returns the variance of the data in the provided field.
+
+    * Default alias: ``<field>__variance``
+    * Return type: float
+
+Has one optional argument:
+
+.. attribute:: sample
+
+    By default, ``Variance`` returns the population variance. However,
+    if ``sample=True``, the return value will be the sample variance.
+
+.. admonition:: SQLite
+
+    SQLite doesn't provide ``Variance`` out of the box. An implementation is
+    available as an extension module for SQLite. Consult the SQlite
+    documentation for instructions on obtaining and installing this extension.

+ 323 - 0
docs/topics/db/aggregation.txt

@@ -0,0 +1,323 @@
+.. _topics-db-aggregation:
+
+=============
+ Aggregation
+=============
+
+.. versionadded:: 1.1
+
+.. currentmodule:: django.db.models
+
+The topic guide on :ref:`Django's database-abstraction API <topics-db-queries`
+described the way that you can use Django queries that create,
+retrieve, update and delete individual objects. However, sometimes you will
+need to retrieve values that are derived by summarizing or *aggregating* a
+collection of objects. This topic guide describes the ways that aggregate values
+can be generated and returned using Django queries.
+
+Throughout this guide, we'll refer to the following models. These models are
+used to track the inventory for a series of online bookstores:
+
+.. _queryset-model-example:
+
+.. code-block:: python
+
+    class Author(models.Model):
+       name = models.CharField(max_length=100)
+       age = models.IntegerField()
+       friends = models.ManyToManyField('self', blank=True)
+
+    class Publisher(models.Model):
+       name = models.CharField(max_length=300)
+       num_awards = models.IntegerField()
+
+    class Book(models.Model):
+       isbn = models.CharField(max_length=9)
+       name = models.CharField(max_length=300)
+       pages = models.IntegerField()
+       price = models.DecimalField(max_digits=10, decimal_places=2)
+       rating = models.FloatField()
+       authors = models.ManyToManyField(Author)
+       publisher = models.ForeignKey(Publisher)
+       pubdate = models.DateField
+
+    class Store(models.Model):
+       name = models.CharField(max_length=300)
+       books = models.ManyToManyField(Book)
+
+
+Generating aggregates over a QuerySet
+=====================================
+
+Django provides two ways to generate aggregates. The first way is to generate
+summary values over an entire ``QuerySet``. For example, say you wanted to
+calculate the average price of all books available for sale. Django's query
+syntax provides a means for describing the set of all books::
+
+    >>> Book.objects.all()
+
+What we need is a way to calculate summary values over the objects that
+belong to this ``QuerySet``. This is done by appending an ``aggregate()``
+clause onto the ``QuerySet``::
+
+    >>> from django.db.models import Avg
+    >>> Book.objects.all().aggregate(Avg('price'))
+    {'price__avg': 34.35}
+
+The ``all()`` is redundant in this example, so this could be simplified to::
+
+    >>> Book.objects.aggregate(Avg('price'))
+    {'price__avg': 34.35}
+
+The argument to the ``aggregate()`` clause describes the aggregate value that
+we want to compute - in this case, the average of the ``price`` field on the
+``Book`` model. A list of the aggregate functions that are available can be
+found in the :ref:`QuerySet reference <aggregation-functions>`.
+
+``aggregate()`` is a terminal clause for a ``QuerySet`` that, when invoked,
+returns a dictionary of name-value pairs. The name is an identifier for the
+aggregate value; the value is the computed aggregate. The name is
+automatically generated from the name of the field and the aggregate function.
+If you want to manually specify a name for the aggregate value, you can do so
+by providing that name when you specify the aggregate clause::
+
+    >>> Book.objects.aggregate(average_price=Avg('price'))
+    {'average_price': 34.35}
+
+If you want to generate more than one aggregate, you just add another
+argument to the ``aggregate()`` clause. So, if we also wanted to know
+the maximum and minimum price of all books, we would issue the query::
+
+    >>> Book.objects.aggregate(Avg('price'), Max('price'), Min('price'))
+    {'price__avg': 34.35, 'price__max': Decimal('81.20'), 'price__min': Decimal('12.99')}
+
+Generating aggregates for each item in a QuerySet
+=================================================
+
+The second way to generate summary values is to generate an independent
+summary for each object in a ``Queryset``. For example, if you are retrieving
+a list of books, you may want to know how many authors contributed to
+each book. Each Book has a many-to-many relationship with the Author; we
+want to summarize this relationship for each book in the ``QuerySet``.
+
+Per-object summaries can be generated using the ``annotate()`` clause.
+When an ``annotate()`` clause is specified, each object in the ``QuerySet``
+will be annotated with the specified values.
+
+The syntax for these annotations is identical to that used for the
+``aggregate()`` clause. Each argument to ``annotate()`` describes and
+aggregate that is to be calculated. For example, to annotate Books with
+the number of authors::
+
+    # Build an annotated queryset
+    >>> q = Book.objects.annotate(Count('authors'))
+    # Interrogate the first object in the queryset
+    >>> q[0]
+    <Book: The Definitive Guide to Django>
+    >>> q[0].authors__count
+    2
+    # Interrogate the second object in the queryset
+    >>> q[1]
+    <Book: Practical Django Projects>
+    >>> q[1].authors__count
+    1
+
+As with ``aggregate()``, the name for the annotation is automatically derived
+from the name of the aggregate function and the name of the field being
+aggregated. You can override this default name by providing an alias when you
+specify the annotation::
+
+    >>> q = Book.objects.annotate(num_authors=Count('authors'))
+    >>> q[0].num_authors
+    2
+    >>> q[1].num_authors
+    1
+
+Unlike ``aggregate()``, ``annotate()`` is *not* a terminal clause. The output
+of the ``annotate()`` clause is a ``QuerySet``; this ``QuerySet`` can be
+modified using any other ``QuerySet`` operation, including ``filter()``,
+``order_by``, or even additional calls to ``annotate()``.
+
+Joins and aggregates
+====================
+
+So far, we have dealt with aggregates over fields that belong to the
+model being queries. However, sometimes the value you want to aggregate
+will belong to a model that is related to the model you are querying.
+
+When specifying the field to be aggregated in an aggregate functions,
+Django will allow you to use the same
+:ref:`double underscore notation <field-lookups-intro>` that is used
+when referring to related fields in filters. Django will then handle
+any table joins that are required to retrieve and aggregate the
+related value.
+
+For example, to find the price range of books offered in each store,
+you could use the annotation::
+
+    >>> Store.objects.annotate(min_price=Min('books__price'), max_price=Min('books__price'))
+
+This tells Django to retrieve the Store model, join (through the
+many-to-many relationship) with the Book model, and aggregate on the
+price field of the book model to produce a minimum and maximum value.
+
+The same rules apply to the ``aggregate()`` clause. If you wanted to
+know the lowest and highest price of any book that is available for sale
+in a store, you could use the aggregate::
+
+    >>> Store.objects.aggregate(min_price=Min('books__price'), max_price=Min('books__price'))
+
+Join chains can be as deep as you required. For example, to extract the
+age of the youngest author of any book available for sale, you could
+issue the query::
+
+    >>> Store.objects.aggregate(youngest_age=Min('books__authors__age'))
+
+Aggregations and other QuerySet clauses
+=======================================
+
+``filter()`` and ``exclude()``
+------------------------------
+
+Aggregates can also participate in filters. Any ``filter()`` (or
+``exclude()``) applied to normal model fields will have the effect of
+constraining the objects that are considered for aggregation.
+
+When used with an ``annotate()`` clause, a filter has the effect of
+constraining the objects for which an annotation is calculated. For example,
+you can generate an annotated list of all books that have a title starting
+with "Django" using the query::
+
+    >>> Book.objects.filter(name__startswith="Django").annotate(num_authors=Count('authors'))
+
+When used with an ``aggregate()`` clause, a filter has the effect of
+constraining the objects over which the aggregate is calculated.
+For example, you can generate the average price of all books with a
+title that starts with "Django" using the query::
+
+    >>> Book.objects.filter(name__startswith="Django").aggregate(Avg('price'))
+
+Filtering on annotations
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+Annotated values can also be filtered. The alias for the annotation can be
+used in ``filter()`` and ``exclude()`` clauses in the same way as any other
+model field.
+
+For example, to generate a list of books that have more than one author,
+you can issue the query::
+
+    >>> Book.objects.annotate(num_authors=Count('authors')).filter(num_authors__gt=1)
+
+This query generates an annotated result set, and then generates a filter
+based upon that annotation.
+
+Order of ``annotate()`` and ``filter()`` clauses
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+When developing a complex query that involves both ``annotate()`` and
+``filter()`` clauses, particular attention should be paid to the order
+in which the clauses are applied to the ``QuerySet``.
+
+When an ``annotate()`` clause is applied to a query, the annotation is
+computed over the state of the query up to the point where the annotation
+is requested. The practical implication of this is that ``filter()`` and
+``annotate()`` are not transitive operations -- that is, there is a
+difference between the query::
+
+    >>> Publisher.objects.annotate(num_books=Count('book')).filter(book__rating__gt=3.0)
+
+and the query::
+
+    >>> Publisher.objects.filter(book__rating__gt=3.0).annotate(num_books=Count('book'))
+
+Both queries will return a list of Publishers that have at least one good
+book (i.e., a book with a rating exceeding 3.0). However, the annotation in
+the first query will provide the total number of all books published by the
+publisher; the second query will only include good books in the annotated
+count. In the first query, the annotation precedes the filter, so the
+filter has no effect on the annotation. In the second query, the filter
+preceeds the annotation, and as a result, the filter constrains the objects
+considered when calculating the annotation.
+
+``order_by()``
+--------------
+
+Annotations can be used as a basis for ordering. When you
+define an ``order_by()`` clause, the aggregates you provide can reference
+any alias defined as part of an ``annotate()`` clause in the query.
+
+For example, to order a ``QuerySet`` of books by the number of authors
+that have contributed to the book, you could use the following query::
+
+    >>> Book.objects.annotate(num_authors=Count('authors')).order_by('num_authors')
+
+``values()``
+------------
+
+Ordinarily, annotations are generated on a per-object basis - an annotated
+``QuerySet`` will return one result for each object in the original
+``Queryset``. However, when a ``values()`` clause is used to constrain the
+columns that are returned in the result set, the method for evaluating
+annotations is slightly different. Instead of returning an annotated result
+for each result in the original ``QuerySet``, the original results are
+grouped according to the unique combinations of the fields specified in the
+``values()`` clause. An annotation is then provided for each unique group;
+the annotation is computed over all members of the group.
+
+For example, consider an author query that attempts to find out the average
+rating of books written by each author:
+
+    >>> Author.objects.annotate(average_rating=Avg('book_rating'))
+
+This will return one result for each author in the database, annotate with
+their average book rating.
+
+However, the result will be slightly different if you use a ``values()`` clause::
+
+    >>> Author.objects.values('name').annotate(average_rating=Avg('book_rating'))
+
+In this example, the authors will be grouped by name, so you will only get
+an annotated result for each *unique* author name. This means if you have
+two authors with the same name, their results will be merged into a single
+result in the output of the query; the average will be computed as the
+average over the books written by both authors.
+
+The annotation name will be added to the fields returned
+as part of the ``ValuesQuerySet``.
+
+Order of ``annotate()`` and ``filter()`` clauses
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+As with the ``filter()`` clause, the order in which ``annotate()`` and
+``values()`` clauses are applied to a query is significant. If the
+``values()`` clause precedes the ``annotate()``, the annotation will be
+computed using the grouping described by the ``values()`` clause.
+
+However, if the ``annotate()`` clause precedes the ``values()`` clause,
+the annotations will be generated over the entire query set. In this case,
+the ``values()`` clause only constrains the fields that are generated on
+output.
+
+For example, if we reverse the order of the ``values()`` and ``annotate()``
+clause from our previous example::
+
+    >>> Author.objects.annotate(average_rating=Avg('book_rating')).values('name')
+
+This will now yield one unique result for each author; however, only
+the author's name and the ``average_rating`` annotation will be returned
+in the output data.
+
+Aggregating annotations
+-----------------------
+
+You can also generate an aggregate on the result of an annotation. When you
+define an ``aggregate()`` clause, the aggregates you provide can reference
+any alias defined as part of an ``annotate()`` clause in the query.
+
+For example, if you wanted to calculate the average number of authors per
+book you first annotate the set of books with the author count, then
+aggregate that author count, referencing the annotation field::
+
+    >>> Book.objects.annotate(num_authors=Count('authors')).aggregate(Avg('num_authors'))
+    {'num_authors__avg': 1.66}

+ 1 - 0
docs/topics/db/index.txt

@@ -12,6 +12,7 @@ model maps to a single database table.
 
    models
    queries
+   aggregation
    managers
    sql
    transactions

+ 0 - 0
tests/modeltests/aggregation/__init__.py


+ 229 - 0
tests/modeltests/aggregation/fixtures/initial_data.json

@@ -0,0 +1,229 @@
+[
+    {
+        "pk": 1,
+        "model": "aggregation.publisher",
+        "fields": {
+            "name": "Apress",
+            "num_awards": 3
+        }
+    },
+    {
+        "pk": 2,
+        "model": "aggregation.publisher",
+        "fields": {
+            "name": "Sams",
+            "num_awards": 1
+        }
+    },
+    {
+        "pk": 3,
+        "model": "aggregation.publisher",
+        "fields": {
+            "name": "Prentice Hall",
+            "num_awards": 7
+        }
+    },
+    {
+        "pk": 4,
+        "model": "aggregation.publisher",
+        "fields": {
+            "name": "Morgan Kaufmann",
+            "num_awards": 9
+        }
+    },
+    {
+        "pk": 1,
+        "model": "aggregation.book",
+        "fields": {
+            "publisher": 1,
+            "isbn": "159059725",
+            "name": "The Definitive Guide to Django: Web Development Done Right",
+            "price": "30.00",
+            "rating": 4.5,
+            "authors": [1, 2],
+            "pages": 447,
+            "pubdate": "2007-12-6"
+        }
+    },
+    {
+        "pk": 2,
+        "model": "aggregation.book",
+        "fields": {
+            "publisher": 2,
+            "isbn": "067232959",
+            "name": "Sams Teach Yourself Django in 24 Hours",
+            "price": "23.09",
+            "rating": 3.0,
+            "authors": [3],
+            "pages": 528,
+            "pubdate": "2008-3-3"
+        }
+    },
+    {
+        "pk": 3,
+        "model": "aggregation.book",
+        "fields": {
+            "publisher": 1,
+            "isbn": "159059996",
+            "name": "Practical Django Projects",
+            "price": "29.69",
+            "rating": 4.0,
+            "authors": [4],
+            "pages": 300,
+            "pubdate": "2008-6-23"
+        }
+    },
+    {
+        "pk": 4,
+        "model": "aggregation.book",
+        "fields": {
+            "publisher": 3,
+            "isbn": "013235613",
+            "name": "Python Web Development with Django",
+            "price": "29.69",
+            "rating": 4.0,
+            "authors": [5, 6, 7],
+            "pages": 350,
+            "pubdate": "2008-11-3"
+        }
+    },
+    {
+        "pk": 5,
+        "model": "aggregation.book",
+        "fields": {
+            "publisher": 3,
+            "isbn": "013790395",
+            "name": "Artificial Intelligence: A Modern Approach",
+            "price": "82.80",
+            "rating": 4.0,
+            "authors": [8, 9],
+            "pages": 1132,
+            "pubdate": "1995-1-15"
+        }
+    },
+    {
+        "pk": 6,
+        "model": "aggregation.book",
+        "fields": {
+            "publisher": 4,
+            "isbn": "155860191",
+            "name": "Paradigms of Artificial Intelligence Programming: Case Studies in Common Lisp",
+            "price": "75.00",
+            "rating": 5.0,
+            "authors": [8],
+            "pages": 946,
+            "pubdate": "1991-10-15"
+        }
+    },
+    {
+        "pk": 1,
+        "model": "aggregation.store",
+        "fields": {
+            "books": [1, 2, 3, 4, 5, 6],
+            "name": "Amazon.com",
+            "original_opening": "1994-4-23 9:17:42",
+            "friday_night_closing": "23:59:59"
+        }
+    },
+    {
+        "pk": 2,
+        "model": "aggregation.store",
+        "fields": {
+            "books": [1, 3, 5, 6],
+            "name": "Books.com",
+            "original_opening": "2001-3-15 11:23:37",
+            "friday_night_closing": "23:59:59"
+        }
+    },
+    {
+        "pk": 3,
+        "model": "aggregation.store",
+        "fields": {
+            "books": [3, 4, 6],
+            "name": "Mamma and Pappa's Books",
+            "original_opening": "1945-4-25 16:24:14",
+            "friday_night_closing": "21:30:00"
+        }
+    },
+    {
+        "pk": 1,
+        "model": "aggregation.author",
+        "fields": {
+            "age": 34,
+            "friends": [2, 4],
+            "name": "Adrian Holovaty"
+        }
+    },
+    {
+        "pk": 2,
+        "model": "aggregation.author",
+        "fields": {
+            "age": 35,
+            "friends": [1, 7],
+            "name": "Jacob Kaplan-Moss"
+        }
+    },
+    {
+        "pk": 3,
+        "model": "aggregation.author",
+        "fields": {
+            "age": 45,
+            "friends": [],
+            "name": "Brad Dayley"
+        }
+    },
+    {
+        "pk": 4,
+        "model": "aggregation.author",
+        "fields": {
+            "age": 29,
+            "friends": [1],
+            "name": "James Bennett"
+        }
+    },
+    {
+        "pk": 5,
+        "model": "aggregation.author",
+        "fields": {
+            "age": 37,
+            "friends": [6, 7],
+            "name": "Jeffrey Forcier "
+        }
+    },
+    {
+        "pk": 6,
+        "model": "aggregation.author",
+        "fields": {
+            "age": 29,
+            "friends": [5, 7],
+            "name": "Paul Bissex"
+        }
+    },
+    {
+        "pk": 7,
+        "model": "aggregation.author",
+        "fields": {
+            "age": 25,
+            "friends": [2, 5, 6],
+            "name": "Wesley J. Chun"
+        }
+    },
+    {
+        "pk": 8,
+        "model": "aggregation.author",
+        "fields": {
+            "age": 57,
+            "friends": [9],
+            "name": "Peter Norvig"
+        }
+    },
+    {
+        "pk": 9,
+        "model": "aggregation.author",
+        "fields": {
+            "age": 46,
+            "friends": [8],
+            "name": "Stuart Russell"
+        }
+    }
+]

+ 379 - 0
tests/modeltests/aggregation/models.py

@@ -0,0 +1,379 @@
+# coding: utf-8
+from django.db import models
+
+try:
+    sorted
+except NameError:
+    from django.utils.itercompat import sorted      # For Python 2.3
+
+class Author(models.Model):
+   name = models.CharField(max_length=100)
+   age = models.IntegerField()
+   friends = models.ManyToManyField('self', blank=True)
+
+   def __unicode__(self):
+      return self.name
+
+class Publisher(models.Model):
+   name = models.CharField(max_length=300)
+   num_awards = models.IntegerField()
+
+   def __unicode__(self):
+      return self.name
+
+class Book(models.Model):
+   isbn = models.CharField(max_length=9)
+   name = models.CharField(max_length=300)
+   pages = models.IntegerField()
+   rating = models.FloatField()
+   price = models.DecimalField(decimal_places=2, max_digits=6)
+   authors = models.ManyToManyField(Author)
+   publisher = models.ForeignKey(Publisher)
+   pubdate = models.DateField()
+
+   def __unicode__(self):
+      return self.name
+
+class Store(models.Model):
+   name = models.CharField(max_length=300)
+   books = models.ManyToManyField(Book)
+   original_opening = models.DateTimeField()
+   friday_night_closing = models.TimeField()
+
+   def __unicode__(self):
+      return self.name
+
+class Entries(models.Model):
+   EntryID = models.AutoField(primary_key=True, db_column='Entry ID')
+   Entry = models.CharField(unique=True, max_length=50)
+   Exclude = models.BooleanField()
+
+class Clues(models.Model):
+   ID = models.AutoField(primary_key=True)
+   EntryID = models.ForeignKey(Entries, verbose_name='Entry', db_column = 'Entry ID')
+   Clue = models.CharField(max_length=150)
+
+# Tests on 'aggergate'
+# Different backends and numbers.
+__test__ = {'API_TESTS': """
+>>> from django.core import management
+>>> try:
+...     from decimal import Decimal
+... except:
+...     from django.utils._decimal import Decimal
+>>> from datetime import date
+
+# Reset the database representation of this app.
+# This will return the database to a clean initial state.
+>>> management.call_command('flush', verbosity=0, interactive=False)
+
+# Empty Call - request nothing, get nothing.
+>>> Author.objects.all().aggregate()
+{}
+
+>>> from django.db.models import Avg, Sum, Count, Max, Min
+
+# Single model aggregation
+#
+
+# Single aggregate
+# Average age of Authors
+>>> Author.objects.all().aggregate(Avg('age'))
+{'age__avg': 37.4...}
+
+# Multiple aggregates
+# Average and Sum of Author ages
+>>> Author.objects.all().aggregate(Sum('age'), Avg('age'))
+{'age__sum': 337, 'age__avg': 37.4...}
+
+# Aggreates interact with filters, and only
+# generate aggregate values for the filtered values
+# Sum of the age of those older than 29 years old
+>>> Author.objects.all().filter(age__gt=29).aggregate(Sum('age'))
+{'age__sum': 254}
+
+# Depth-1 Joins
+#
+
+# On Relationships with self
+# Average age of the friends of each author
+>>> Author.objects.all().aggregate(Avg('friends__age'))
+{'friends__age__avg': 34.07...}
+
+# On ManyToMany Relationships
+#
+
+# Forward
+# Average age of the Authors of Books with a rating of less than 4.5
+>>> Book.objects.all().filter(rating__lt=4.5).aggregate(Avg('authors__age'))
+{'authors__age__avg': 38.2...}
+
+# Backward
+# Average rating of the Books whose Author's name contains the letter 'a'
+>>> Author.objects.all().filter(name__contains='a').aggregate(Avg('book__rating'))
+{'book__rating__avg': 4.0}
+
+# On OneToMany Relationships
+#
+
+# Forward
+# Sum of the number of awards of each Book's Publisher
+>>> Book.objects.all().aggregate(Sum('publisher__num_awards'))
+{'publisher__num_awards__sum': 30}
+
+# Backward
+# Sum of the price of every Book that has a Publisher
+>>> Publisher.objects.all().aggregate(Sum('book__price'))
+{'book__price__sum': Decimal("270.27")}
+
+# Multiple Joins
+#
+
+# Forward
+>>> Store.objects.all().aggregate(Max('books__authors__age'))
+{'books__authors__age__max': 57}
+
+# Backward
+# Note that the very long default alias may be truncated
+>>> Author.objects.all().aggregate(Min('book__publisher__num_awards'))
+{'book__publisher__num_award...': 1}
+
+# Aggregate outputs can also be aliased.
+
+# Average amazon.com Book rating
+>>> Store.objects.filter(name='Amazon.com').aggregate(amazon_mean=Avg('books__rating'))
+{'amazon_mean': 4.08...}
+
+# Tests on annotate()
+
+# An empty annotate call does nothing but return the same QuerySet
+>>> Book.objects.all().annotate().order_by('pk')
+[<Book: The Definitive Guide to Django: Web Development Done Right>, <Book: Sams Teach Yourself Django in 24 Hours>, <Book: Practical Django Projects>, <Book: Python Web Development with Django>, <Book: Artificial Intelligence: A Modern Approach>, <Book: Paradigms of Artificial Intelligence Programming: Case Studies in Common Lisp>]
+
+# Annotate inserts the alias into the model object with the aggregated result
+>>> books = Book.objects.all().annotate(mean_age=Avg('authors__age'))
+>>> books.get(pk=1).name
+u'The Definitive Guide to Django: Web Development Done Right'
+
+>>> books.get(pk=1).mean_age
+34.5
+
+# On ManyToMany Relationships
+
+# Forward
+# Average age of the Authors of each book with a rating less than 4.5
+>>> books = Book.objects.all().filter(rating__lt=4.5).annotate(Avg('authors__age'))
+>>> sorted([(b.name, b.authors__age__avg) for b in books])
+[(u'Artificial Intelligence: A Modern Approach', 51.5), (u'Practical Django Projects', 29.0), (u'Python Web Development with Django', 30.3...), (u'Sams Teach Yourself Django in 24 Hours', 45.0)]
+
+# Count the number of authors of each book
+>>> books = Book.objects.annotate(num_authors=Count('authors'))
+>>> sorted([(b.name, b.num_authors) for b in books])
+[(u'Artificial Intelligence: A Modern Approach', 2), (u'Paradigms of Artificial Intelligence Programming: Case Studies in Common Lisp', 1), (u'Practical Django Projects', 1), (u'Python Web Development with Django', 3), (u'Sams Teach Yourself Django in 24 Hours', 1), (u'The Definitive Guide to Django: Web Development Done Right', 2)]
+
+# Backward
+# Average rating of the Books whose Author's names contains the letter 'a'
+>>> authors = Author.objects.all().filter(name__contains='a').annotate(Avg('book__rating'))
+>>> sorted([(a.name, a.book__rating__avg) for a in authors])
+[(u'Adrian Holovaty', 4.5), (u'Brad Dayley', 3.0), (u'Jacob Kaplan-Moss', 4.5), (u'James Bennett', 4.0), (u'Paul Bissex', 4.0), (u'Stuart Russell', 4.0)]
+
+# Count the number of books written by each author
+>>> authors = Author.objects.annotate(num_books=Count('book'))
+>>> sorted([(a.name, a.num_books) for a in authors])
+[(u'Adrian Holovaty', 1), (u'Brad Dayley', 1), (u'Jacob Kaplan-Moss', 1), (u'James Bennett', 1), (u'Jeffrey Forcier ', 1), (u'Paul Bissex', 1), (u'Peter Norvig', 2), (u'Stuart Russell', 1), (u'Wesley J. Chun', 1)]
+
+# On OneToMany Relationships
+
+# Forward
+# Annotate each book with the number of awards of each Book's Publisher
+>>> books = Book.objects.all().annotate(Sum('publisher__num_awards'))
+>>> sorted([(b.name, b.publisher__num_awards__sum) for b in books])
+[(u'Artificial Intelligence: A Modern Approach', 7), (u'Paradigms of Artificial Intelligence Programming: Case Studies in Common Lisp', 9), (u'Practical Django Projects', 3), (u'Python Web Development with Django', 7), (u'Sams Teach Yourself Django in 24 Hours', 1), (u'The Definitive Guide to Django: Web Development Done Right', 3)]
+
+# Backward
+# Annotate each publisher with the sum of the price of all books sold
+>>> publishers = Publisher.objects.all().annotate(Sum('book__price'))
+>>> sorted([(p.name, p.book__price__sum) for p in publishers])
+[(u'Apress', Decimal("59.69")), (u'Morgan Kaufmann', Decimal("75.00")), (u'Prentice Hall', Decimal("112.49")), (u'Sams', Decimal("23.09"))]
+
+# Calls to values() are not commutative over annotate().
+
+# Calling values on a queryset that has annotations returns the output
+# as a dictionary
+>>> Book.objects.filter(pk=1).annotate(mean_age=Avg('authors__age')).values()
+[{'rating': 4.5, 'isbn': u'159059725', 'name': u'The Definitive Guide to Django: Web Development Done Right', 'pubdate': datetime.date(2007, 12, 6), 'price': Decimal("30..."), 'id': 1, 'publisher_id': 1, 'pages': 447, 'mean_age': 34.5}]
+
+>>> Book.objects.filter(pk=1).annotate(mean_age=Avg('authors__age')).values('pk', 'isbn', 'mean_age')
+[{'pk': 1, 'isbn': u'159059725', 'mean_age': 34.5}]
+
+# Calling it with paramters reduces the output but does not remove the
+# annotation.
+>>> Book.objects.filter(pk=1).annotate(mean_age=Avg('authors__age')).values('name')
+[{'name': u'The Definitive Guide to Django: Web Development Done Right', 'mean_age': 34.5}]
+
+# An empty values() call before annotating has the same effect as an
+# empty values() call after annotating
+>>> Book.objects.filter(pk=1).values().annotate(mean_age=Avg('authors__age'))
+[{'rating': 4.5, 'isbn': u'159059725', 'name': u'The Definitive Guide to Django: Web Development Done Right', 'pubdate': datetime.date(2007, 12, 6), 'price': Decimal("30..."), 'id': 1, 'publisher_id': 1, 'pages': 447, 'mean_age': 34.5}]
+
+# Calling annotate() on a ValuesQuerySet annotates over the groups of
+# fields to be selected by the ValuesQuerySet.
+
+# Note that an extra parameter is added to each dictionary. This
+# parameter is a queryset representing the objects that have been
+# grouped to generate the annotation
+
+>>> Book.objects.all().values('rating').annotate(n_authors=Count('authors__id'), mean_age=Avg('authors__age')).order_by('rating')
+[{'rating': 3.0, 'n_authors': 1, 'mean_age': 45.0}, {'rating': 4.0, 'n_authors': 6, 'mean_age': 37.1...}, {'rating': 4.5, 'n_authors': 2, 'mean_age': 34.5}, {'rating': 5.0, 'n_authors': 1, 'mean_age': 57.0}]
+
+# If a join doesn't match any objects, an aggregate returns None
+>>> authors = Author.objects.all().annotate(Avg('friends__age')).order_by('id')
+>>> len(authors)
+9
+>>> sorted([(a.name, a.friends__age__avg) for a in authors])
+[(u'Adrian Holovaty', 32.0), (u'Brad Dayley', None), (u'Jacob Kaplan-Moss', 29.5), (u'James Bennett', 34.0), (u'Jeffrey Forcier ', 27.0), (u'Paul Bissex', 31.0), (u'Peter Norvig', 46.0), (u'Stuart Russell', 57.0), (u'Wesley J. Chun', 33.6...)]
+
+
+# The Count aggregation function allows an extra parameter: distinct.
+# This restricts the count results to unique items
+>>> Book.objects.all().aggregate(Count('rating'))
+{'rating__count': 6}
+
+>>> Book.objects.all().aggregate(Count('rating', distinct=True))
+{'rating__count': 4}
+
+# Retreiving the grouped objects
+
+# When using Count you can also omit the primary key and refer only to
+# the related field name if you want to count all the related objects
+# and not a specific column
+>>> explicit = list(Author.objects.annotate(Count('book__id')))
+>>> implicit = list(Author.objects.annotate(Count('book')))
+>>> explicit == implicit
+True
+
+# Ordering is allowed on aggregates
+>>> Book.objects.values('rating').annotate(oldest=Max('authors__age')).order_by('oldest', 'rating')
+[{'rating': 4.5, 'oldest': 35}, {'rating': 3.0, 'oldest': 45}, {'rating': 4.0, 'oldest': 57}, {'rating': 5.0, 'oldest': 57}]
+
+>>> Book.objects.values('rating').annotate(oldest=Max('authors__age')).order_by('-oldest', '-rating')
+[{'rating': 5.0, 'oldest': 57}, {'rating': 4.0, 'oldest': 57}, {'rating': 3.0, 'oldest': 45}, {'rating': 4.5, 'oldest': 35}]
+
+# It is possible to aggregate over anotated values
+>>> Book.objects.all().annotate(num_authors=Count('authors__id')).aggregate(Avg('num_authors'))
+{'num_authors__avg': 1.66...}
+
+# You can filter the results based on the aggregation alias.
+
+# Lets add a publisher to test the different possibilities for filtering
+>>> p = Publisher(name='Expensive Publisher', num_awards=0)
+>>> p.save()
+>>> Book(name='ExpensiveBook1', pages=1, isbn='111', rating=3.5, price=Decimal("1000"), publisher=p, pubdate=date(2008,12,1)).save()
+>>> Book(name='ExpensiveBook2', pages=1, isbn='222', rating=4.0, price=Decimal("1000"), publisher=p, pubdate=date(2008,12,2)).save()
+>>> Book(name='ExpensiveBook3', pages=1, isbn='333', rating=4.5, price=Decimal("35"), publisher=p, pubdate=date(2008,12,3)).save()
+
+# Publishers that have:
+
+# (i) more than one book
+>>> Publisher.objects.annotate(num_books=Count('book__id')).filter(num_books__gt=1).order_by('pk')
+[<Publisher: Apress>, <Publisher: Prentice Hall>, <Publisher: Expensive Publisher>]
+
+# (ii) a book that cost less than 40
+>>> Publisher.objects.filter(book__price__lt=Decimal("40.0")).order_by('pk')
+[<Publisher: Apress>, <Publisher: Apress>, <Publisher: Sams>, <Publisher: Prentice Hall>, <Publisher: Expensive Publisher>]
+
+# (iii) more than one book and (at least) a book that cost less than 40
+>>> Publisher.objects.annotate(num_books=Count('book__id')).filter(num_books__gt=1, book__price__lt=Decimal("40.0")).order_by('pk')
+[<Publisher: Apress>, <Publisher: Prentice Hall>, <Publisher: Expensive Publisher>]
+
+# (iv) more than one book that costs less than $40
+>>> Publisher.objects.filter(book__price__lt=Decimal("40.0")).annotate(num_books=Count('book__id')).filter(num_books__gt=1).order_by('pk')
+[<Publisher: Apress>]
+
+# Now a bit of testing on the different lookup types
+#
+
+>>> Publisher.objects.annotate(num_books=Count('book')).filter(num_books__range=[1, 3]).order_by('pk')
+[<Publisher: Apress>, <Publisher: Sams>, <Publisher: Prentice Hall>, <Publisher: Morgan Kaufmann>, <Publisher: Expensive Publisher>]
+
+>>> Publisher.objects.annotate(num_books=Count('book')).filter(num_books__range=[1, 2]).order_by('pk')
+[<Publisher: Apress>, <Publisher: Sams>, <Publisher: Prentice Hall>, <Publisher: Morgan Kaufmann>]
+
+>>> Publisher.objects.annotate(num_books=Count('book')).filter(num_books__in=[1, 3]).order_by('pk')
+[<Publisher: Sams>, <Publisher: Morgan Kaufmann>, <Publisher: Expensive Publisher>]
+
+>>> Publisher.objects.annotate(num_books=Count('book')).filter(num_books__isnull=True)
+[]
+
+>>> p.delete()
+
+# Does Author X have any friends? (or better, how many friends does author X have)
+>> Author.objects.filter(pk=1).aggregate(Count('friends__id'))
+{'friends__id__count': 2.0}
+
+# Give me a list of all Books with more than 1 authors
+>>> Book.objects.all().annotate(num_authors=Count('authors__name')).filter(num_authors__ge=2).order_by('pk')
+[<Book: The Definitive Guide to Django: Web Development Done Right>, <Book: Artificial Intelligence: A Modern Approach>]
+
+# Give me a list of all Authors that have no friends
+>>> Author.objects.all().annotate(num_friends=Count('friends__id', distinct=True)).filter(num_friends=0).order_by('pk')
+[<Author: Brad Dayley>]
+
+# Give me a list of all publishers that have published more than 1 books
+>>> Publisher.objects.all().annotate(num_books=Count('book__id')).filter(num_books__gt=1).order_by('pk')
+[<Publisher: Apress>, <Publisher: Prentice Hall>]
+
+# Give me a list of all publishers that have published more than 1 books that cost less than 40
+>>> Publisher.objects.all().filter(book__price__lt=Decimal("40.0")).annotate(num_books=Count('book__id')).filter(num_books__gt=1)
+[<Publisher: Apress>]
+
+# Give me a list of all Books that were written by X and one other author.
+>>> Book.objects.all().annotate(num_authors=Count('authors__id')).filter(authors__name__contains='Norvig', num_authors__gt=1)
+[<Book: Artificial Intelligence: A Modern Approach>]
+
+# Give me the average rating of all Books that were written by X and one other author.
+#(Aggregate over objects discovered using membership of the m2m set)
+
+# Adding an existing author to another book to test it the right way
+>>> a = Author.objects.get(name__contains='Norvig')
+>>> b = Book.objects.get(name__contains='Done Right')
+>>> b.authors.add(a)
+>>> b.save()
+
+# This should do it
+>>> Book.objects.all().annotate(num_authors=Count('authors__id')).filter(authors__name__contains='Norvig', num_authors__gt=1).aggregate(Avg('rating'))
+{'rating__avg': 4.25}
+>>> b.authors.remove(a)
+
+# Give me a list of all Authors that have published a book with at least one other person
+# (Filters over a count generated on a related object)
+#
+# Cheating: [a for a in Author.objects.all().annotate(num_coleagues=Count('book__authors__id'), num_books=Count('book__id', distinct=True)) if a.num_coleagues - a.num_books > 0]
+# F-Syntax is required. Will be fixed after F objects are available
+
+# Tests on fields with non-default table and column names.
+>>> Clues.objects.values('EntryID__Entry').annotate(Appearances=Count('EntryID'), Distinct_Clues=Count('Clue', distinct=True))
+[]
+
+# Aggregates also work on dates, times and datetimes
+>>> Publisher.objects.annotate(earliest_book=Min('book__pubdate')).order_by('earliest_book').values()
+[{'earliest_book': datetime.date(1991, 10, 15), 'num_awards': 9, 'id': 4, 'name': u'Morgan Kaufmann'}, {'earliest_book': datetime.date(1995, 1, 15), 'num_awards': 7, 'id': 3, 'name': u'Prentice Hall'}, {'earliest_book': datetime.date(2007, 12, 6), 'num_awards': 3, 'id': 1, 'name': u'Apress'}, {'earliest_book': datetime.date(2008, 3, 3), 'num_awards': 1, 'id': 2, 'name': u'Sams'}]
+
+>>> Store.objects.aggregate(Max('friday_night_closing'), Min("original_opening"))
+{'friday_night_closing__max': datetime.time(23, 59, 59), 'original_opening__min': datetime.datetime(1945, 4, 25, 16, 24, 14)}
+
+# values_list() can also be used
+
+>>> Book.objects.filter(pk=1).annotate(mean_age=Avg('authors__age')).values_list('pk', 'isbn', 'mean_age')
+[(1, u'159059725', 34.5)]
+
+>>> Book.objects.filter(pk=1).annotate(mean_age=Avg('authors__age')).values_list('isbn')
+[(u'159059725',)]
+
+>>> Book.objects.filter(pk=1).annotate(mean_age=Avg('authors__age')).values_list('mean_age')
+[(34.5,)]
+
+>>> Book.objects.filter(pk=1).annotate(mean_age=Avg('authors__age')).values_list('mean_age', flat=True)
+[34.5]
+
+"""}

+ 0 - 0
tests/regressiontests/aggregation_regress/__init__.py


+ 229 - 0
tests/regressiontests/aggregation_regress/fixtures/initial_data.json

@@ -0,0 +1,229 @@
+[
+    {
+        "pk": 1,
+        "model": "aggregation_regress.publisher",
+        "fields": {
+            "name": "Apress",
+            "num_awards": 3
+        }
+    },
+    {
+        "pk": 2,
+        "model": "aggregation_regress.publisher",
+        "fields": {
+            "name": "Sams",
+            "num_awards": 1
+        }
+    },
+    {
+        "pk": 3,
+        "model": "aggregation_regress.publisher",
+        "fields": {
+            "name": "Prentice Hall",
+            "num_awards": 7
+        }
+    },
+    {
+        "pk": 4,
+        "model": "aggregation_regress.publisher",
+        "fields": {
+            "name": "Morgan Kaufmann",
+            "num_awards": 9
+        }
+    },
+    {
+        "pk": 1,
+        "model": "aggregation_regress.book",
+        "fields": {
+            "publisher": 1,
+            "isbn": "159059725",
+            "name": "The Definitive Guide to Django: Web Development Done Right",
+            "price": "30.00",
+            "rating": 4.5,
+            "authors": [1, 2],
+            "pages": 447,
+            "pubdate": "2007-12-6"
+        }
+    },
+    {
+        "pk": 2,
+        "model": "aggregation_regress.book",
+        "fields": {
+            "publisher": 2,
+            "isbn": "067232959",
+            "name": "Sams Teach Yourself Django in 24 Hours",
+            "price": "23.09",
+            "rating": 3.0,
+            "authors": [3],
+            "pages": 528,
+            "pubdate": "2008-3-3"
+        }
+    },
+    {
+        "pk": 3,
+        "model": "aggregation_regress.book",
+        "fields": {
+            "publisher": 1,
+            "isbn": "159059996",
+            "name": "Practical Django Projects",
+            "price": "29.69",
+            "rating": 4.0,
+            "authors": [4],
+            "pages": 300,
+            "pubdate": "2008-6-23"
+        }
+    },
+    {
+        "pk": 4,
+        "model": "aggregation_regress.book",
+        "fields": {
+            "publisher": 3,
+            "isbn": "013235613",
+            "name": "Python Web Development with Django",
+            "price": "29.69",
+            "rating": 4.0,
+            "authors": [5, 6, 7],
+            "pages": 350,
+            "pubdate": "2008-11-3"
+        }
+    },
+    {
+        "pk": 5,
+        "model": "aggregation_regress.book",
+        "fields": {
+            "publisher": 3,
+            "isbn": "013790395",
+            "name": "Artificial Intelligence: A Modern Approach",
+            "price": "82.80",
+            "rating": 4.0,
+            "authors": [8, 9],
+            "pages": 1132,
+            "pubdate": "1995-1-15"
+        }
+    },
+    {
+        "pk": 6,
+        "model": "aggregation_regress.book",
+        "fields": {
+            "publisher": 4,
+            "isbn": "155860191",
+            "name": "Paradigms of Artificial Intelligence Programming: Case Studies in Common Lisp",
+            "price": "75.00",
+            "rating": 5.0,
+            "authors": [8],
+            "pages": 946,
+            "pubdate": "1991-10-15"
+        }
+    },
+    {
+        "pk": 1,
+        "model": "aggregation_regress.store",
+        "fields": {
+            "books": [1, 2, 3, 4, 5, 6],
+            "name": "Amazon.com",
+            "original_opening": "1994-4-23 9:17:42",
+            "friday_night_closing": "23:59:59"
+        }
+    },
+    {
+        "pk": 2,
+        "model": "aggregation_regress.store",
+        "fields": {
+            "books": [1, 3, 5, 6],
+            "name": "Books.com",
+            "original_opening": "2001-3-15 11:23:37",
+            "friday_night_closing": "23:59:59"
+        }
+    },
+    {
+        "pk": 3,
+        "model": "aggregation_regress.store",
+        "fields": {
+            "books": [3, 4, 6],
+            "name": "Mamma and Pappa's Books",
+            "original_opening": "1945-4-25 16:24:14",
+            "friday_night_closing": "21:30:00"
+        }
+    },
+    {
+        "pk": 1,
+        "model": "aggregation_regress.author",
+        "fields": {
+            "age": 34,
+            "friends": [2, 4],
+            "name": "Adrian Holovaty"
+        }
+    },
+    {
+        "pk": 2,
+        "model": "aggregation_regress.author",
+        "fields": {
+            "age": 35,
+            "friends": [1, 7],
+            "name": "Jacob Kaplan-Moss"
+        }
+    },
+    {
+        "pk": 3,
+        "model": "aggregation_regress.author",
+        "fields": {
+            "age": 45,
+            "friends": [],
+            "name": "Brad Dayley"
+        }
+    },
+    {
+        "pk": 4,
+        "model": "aggregation_regress.author",
+        "fields": {
+            "age": 29,
+            "friends": [1],
+            "name": "James Bennett"
+        }
+    },
+    {
+        "pk": 5,
+        "model": "aggregation_regress.author",
+        "fields": {
+            "age": 37,
+            "friends": [6, 7],
+            "name": "Jeffrey Forcier "
+        }
+    },
+    {
+        "pk": 6,
+        "model": "aggregation_regress.author",
+        "fields": {
+            "age": 29,
+            "friends": [5, 7],
+            "name": "Paul Bissex"
+        }
+    },
+    {
+        "pk": 7,
+        "model": "aggregation_regress.author",
+        "fields": {
+            "age": 25,
+            "friends": [2, 5, 6],
+            "name": "Wesley J. Chun"
+        }
+    },
+    {
+        "pk": 8,
+        "model": "aggregation_regress.author",
+        "fields": {
+            "age": 57,
+            "friends": [9],
+            "name": "Peter Norvig"
+        }
+    },
+    {
+        "pk": 9,
+        "model": "aggregation_regress.author",
+        "fields": {
+            "age": 46,
+            "friends": [8],
+            "name": "Stuart Russell"
+        }
+    }
+]

+ 199 - 0
tests/regressiontests/aggregation_regress/models.py

@@ -0,0 +1,199 @@
+# coding: utf-8
+from django.db import models
+from django.conf import settings
+
+try:
+    sorted
+except NameError:
+    from django.utils.itercompat import sorted      # For Python 2.3
+
+class Author(models.Model):
+   name = models.CharField(max_length=100)
+   age = models.IntegerField()
+   friends = models.ManyToManyField('self', blank=True)
+
+   def __unicode__(self):
+      return self.name
+
+class Publisher(models.Model):
+   name = models.CharField(max_length=300)
+   num_awards = models.IntegerField()
+
+   def __unicode__(self):
+      return self.name
+
+class Book(models.Model):
+   isbn = models.CharField(max_length=9)
+   name = models.CharField(max_length=300)
+   pages = models.IntegerField()
+   rating = models.FloatField()
+   price = models.DecimalField(decimal_places=2, max_digits=6)
+   authors = models.ManyToManyField(Author)
+   publisher = models.ForeignKey(Publisher)
+   pubdate = models.DateField()
+
+   class Meta:
+       ordering = ('name',)
+
+   def __unicode__(self):
+      return self.name
+
+class Store(models.Model):
+   name = models.CharField(max_length=300)
+   books = models.ManyToManyField(Book)
+   original_opening = models.DateTimeField()
+   friday_night_closing = models.TimeField()
+
+   def __unicode__(self):
+      return self.name
+
+#Extra does not play well with values. Modify the tests if/when this is fixed.
+__test__ = {'API_TESTS': """
+>>> from django.core import management
+>>> from django.db.models import get_app
+
+# Reset the database representation of this app.
+# This will return the database to a clean initial state.
+>>> management.call_command('flush', verbosity=0, interactive=False)
+
+>>> from django.db.models import Avg, Sum, Count, Max, Min, StdDev, Variance
+
+# Ordering requests are ignored
+>>> Author.objects.all().order_by('name').aggregate(Avg('age'))
+{'age__avg': 37.4...}
+
+# Implicit ordering is also ignored
+>>> Book.objects.all().aggregate(Sum('pages'))
+{'pages__sum': 3703}
+
+# Baseline results
+>>> Book.objects.all().aggregate(Sum('pages'), Avg('pages'))
+{'pages__sum': 3703, 'pages__avg': 617.1...}
+
+# Empty values query doesn't affect grouping or results
+>>> Book.objects.all().values().aggregate(Sum('pages'), Avg('pages'))
+{'pages__sum': 3703, 'pages__avg': 617.1...}
+
+# Aggregate overrides extra selected column
+>>> Book.objects.all().extra(select={'price_per_page' : 'price / pages'}).aggregate(Sum('pages'))
+{'pages__sum': 3703}
+
+# Annotations get combined with extra select clauses
+>>> sorted(Book.objects.all().annotate(mean_auth_age=Avg('authors__age')).extra(select={'manufacture_cost' : 'price * .5'}).get(pk=2).__dict__.items())
+[('id', 2), ('isbn', u'067232959'), ('manufacture_cost', ...11.545...), ('mean_auth_age', 45.0), ('name', u'Sams Teach Yourself Django in 24 Hours'), ('pages', 528), ('price', Decimal("23.09")), ('pubdate', datetime.date(2008, 3, 3)), ('publisher_id', 2), ('rating', 3.0)]
+
+# Order of the annotate/extra in the query doesn't matter
+>>> sorted(Book.objects.all().extra(select={'manufacture_cost' : 'price * .5'}).annotate(mean_auth_age=Avg('authors__age')).get(pk=2).__dict__.items())
+[('id', 2), ('isbn', u'067232959'), ('manufacture_cost', ...11.545...), ('mean_auth_age', 45.0), ('name', u'Sams Teach Yourself Django in 24 Hours'), ('pages', 528), ('price', Decimal("23.09")), ('pubdate', datetime.date(2008, 3, 3)), ('publisher_id', 2), ('rating', 3.0)]
+
+# Values queries can be combined with annotate and extra
+>>> sorted(Book.objects.all().annotate(mean_auth_age=Avg('authors__age')).extra(select={'manufacture_cost' : 'price * .5'}).values().get(pk=2).items())
+[('id', 2), ('isbn', u'067232959'), ('manufacture_cost', ...11.545...), ('mean_auth_age', 45.0), ('name', u'Sams Teach Yourself Django in 24 Hours'), ('pages', 528), ('price', Decimal("23.09")), ('pubdate', datetime.date(2008, 3, 3)), ('publisher_id', 2), ('rating', 3.0)]
+
+# The order of the values, annotate and extra clauses doesn't matter
+>>> sorted(Book.objects.all().values().annotate(mean_auth_age=Avg('authors__age')).extra(select={'manufacture_cost' : 'price * .5'}).get(pk=2).items())
+[('id', 2), ('isbn', u'067232959'), ('manufacture_cost', ...11.545...), ('mean_auth_age', 45.0), ('name', u'Sams Teach Yourself Django in 24 Hours'), ('pages', 528), ('price', Decimal("23.09")), ('pubdate', datetime.date(2008, 3, 3)), ('publisher_id', 2), ('rating', 3.0)]
+
+# A values query that selects specific columns reduces the output
+>>> sorted(Book.objects.all().annotate(mean_auth_age=Avg('authors__age')).extra(select={'price_per_page' : 'price / pages'}).values('name').get(pk=1).items())
+[('mean_auth_age', 34.5), ('name', u'The Definitive Guide to Django: Web Development Done Right')]
+
+# The annotations are added to values output if values() precedes annotate()
+>>> sorted(Book.objects.all().values('name').annotate(mean_auth_age=Avg('authors__age')).extra(select={'price_per_page' : 'price / pages'}).get(pk=1).items())
+[('mean_auth_age', 34.5), ('name', u'The Definitive Guide to Django: Web Development Done Right')]
+
+# Check that all of the objects are getting counted (allow_nulls) and that values respects the amount of objects
+>>> len(Author.objects.all().annotate(Avg('friends__age')).values())
+9
+
+# Check that consecutive calls to annotate accumulate in the query
+>>> Book.objects.values('price').annotate(oldest=Max('authors__age')).order_by('oldest', 'price').annotate(Max('publisher__num_awards'))
+[{'price': Decimal("30..."), 'oldest': 35, 'publisher__num_awards__max': 3}, {'price': Decimal("29.69"), 'oldest': 37, 'publisher__num_awards__max': 7}, {'price': Decimal("23.09"), 'oldest': 45, 'publisher__num_awards__max': 1}, {'price': Decimal("75..."), 'oldest': 57, 'publisher__num_awards__max': 9}, {'price': Decimal("82.8..."), 'oldest': 57, 'publisher__num_awards__max': 7}]
+
+# Aggregates can be composed over annotations.
+# The return type is derived from the composed aggregate
+>>> Book.objects.all().annotate(num_authors=Count('authors__id')).aggregate(Max('pages'), Max('price'), Sum('num_authors'), Avg('num_authors'))
+{'num_authors__sum': 10, 'num_authors__avg': 1.66..., 'pages__max': 1132, 'price__max': Decimal("82.80")}
+
+# Bad field requests in aggregates are caught and reported
+>>> Book.objects.all().aggregate(num_authors=Count('foo'))
+Traceback (most recent call last):
+...
+FieldError: Cannot resolve keyword 'foo' into field. Choices are: authors, id, isbn, name, pages, price, pubdate, publisher, rating, store
+
+>>> Book.objects.all().annotate(num_authors=Count('foo'))
+Traceback (most recent call last):
+...
+FieldError: Cannot resolve keyword 'foo' into field. Choices are: authors, id, isbn, name, pages, price, pubdate, publisher, rating, store
+
+>>> Book.objects.all().annotate(num_authors=Count('authors__id')).aggregate(Max('foo'))
+Traceback (most recent call last):
+...
+FieldError: Cannot resolve keyword 'foo' into field. Choices are: authors, id, isbn, name, pages, price, pubdate, publisher, rating, store, num_authors
+
+# Old-style count aggregations can be mixed with new-style
+>>> Book.objects.annotate(num_authors=Count('authors')).count()
+6
+
+# Non-ordinal, non-computed Aggregates over annotations correctly inherit
+# the annotation's internal type if the annotation is ordinal or computed
+>>> Book.objects.annotate(num_authors=Count('authors')).aggregate(Max('num_authors'))
+{'num_authors__max': 3}
+
+>>> Publisher.objects.annotate(avg_price=Avg('book__price')).aggregate(Max('avg_price'))
+{'avg_price__max': 75.0...}
+
+# Aliases are quoted to protected aliases that might be reserved names
+>>> Book.objects.aggregate(number=Max('pages'), select=Max('pages'))
+{'number': 1132, 'select': 1132}
+
+
+"""
+}
+
+if settings.DATABASE_ENGINE != 'sqlite3':
+    __test__['API_TESTS'] += """
+# Stddev and Variance are not guaranteed to be available for SQLite.
+
+>>> Book.objects.aggregate(StdDev('pages'))
+{'pages__stddev': 311.46...}
+
+>>> Book.objects.aggregate(StdDev('rating'))
+{'rating__stddev': 0.60...}
+
+>>> Book.objects.aggregate(StdDev('price'))
+{'price__stddev': 24.16...}
+
+
+>>> Book.objects.aggregate(StdDev('pages', sample=True))
+{'pages__stddev': 341.19...}
+
+>>> Book.objects.aggregate(StdDev('rating', sample=True))
+{'rating__stddev': 0.66...}
+
+>>> Book.objects.aggregate(StdDev('price', sample=True))
+{'price__stddev': 26.46...}
+
+
+>>> Book.objects.aggregate(Variance('pages'))
+{'pages__variance': 97010.80...}
+
+>>> Book.objects.aggregate(Variance('rating'))
+{'rating__variance': 0.36...}
+
+>>> Book.objects.aggregate(Variance('price'))
+{'price__variance': 583.77...}
+
+
+>>> Book.objects.aggregate(Variance('pages', sample=True))
+{'pages__variance': 116412.96...}
+
+>>> Book.objects.aggregate(Variance('rating', sample=True))
+{'rating__variance': 0.44...}
+
+>>> Book.objects.aggregate(Variance('price', sample=True))
+{'price__variance': 700.53...}
+
+
+"""
+