Browse Source

Fixed #3566 -- Added support for aggregation to the ORM. See the documentation for details on usage.

Many thanks to:
 * Nicolas Lara, who worked on this feature during the 2008 Google Summer of Code.
 * Alex Gaynor for his help debugging and fixing a number of issues.
 * Justin Bronn for his help integrating with contrib.gis.
 * Karen Tracey for her help with cross-platform testing.
 * Ian Kelly for his help testing and fixing Oracle support.
 * Malcolm Tredinnick for his invaluable review notes.

git-svn-id: http://code.djangoproject.com/svn/django/trunk@9742 bcc190cf-cafb-0310-a4f2-bffc1f526a37
Russell Keith-Magee 16 years ago
parent
commit
cc4e4d9aee

+ 1 - 0
AUTHORS

@@ -31,6 +31,7 @@ answer newbie questions, and generally made Django that much better:
     AgarFu <heaven@croasanaso.sytes.net>
     AgarFu <heaven@croasanaso.sytes.net>
     Dagur Páll Ammendrup <dagurp@gmail.com>
     Dagur Páll Ammendrup <dagurp@gmail.com>
     Collin Anderson <cmawebsite@gmail.com>
     Collin Anderson <cmawebsite@gmail.com>
+    Nicolas Lara <nicolaslara@gmail.com>
     Jeff Anderson <jefferya@programmerq.net>
     Jeff Anderson <jefferya@programmerq.net>
     Marian Andre <django@andre.sk>
     Marian Andre <django@andre.sk>
     Andreas
     Andreas

+ 10 - 0
django/contrib/gis/db/models/aggregates.py

@@ -0,0 +1,10 @@
+from django.db.models import Aggregate
+
+class Extent(Aggregate):
+    name = 'Extent'
+
+class MakeLine(Aggregate):
+    name = 'MakeLine'
+
+class Union(Aggregate):
+    name = 'Union'

+ 68 - 121
django/contrib/gis/db/models/query.py

@@ -3,6 +3,7 @@ from django.db import connection
 from django.db.models.query import sql, QuerySet, Q
 from django.db.models.query import sql, QuerySet, Q
 
 
 from django.contrib.gis.db.backend import SpatialBackend
 from django.contrib.gis.db.backend import SpatialBackend
+from django.contrib.gis.db.models import aggregates
 from django.contrib.gis.db.models.fields import GeometryField, PointField
 from django.contrib.gis.db.models.fields import GeometryField, PointField
 from django.contrib.gis.db.models.sql import AreaField, DistanceField, GeomField, GeoQuery, GeoWhereNode
 from django.contrib.gis.db.models.sql import AreaField, DistanceField, GeomField, GeoQuery, GeoWhereNode
 from django.contrib.gis.measure import Area, Distance
 from django.contrib.gis.measure import Area, Distance
@@ -17,7 +18,7 @@ class GeomSQL(object):
     "Simple wrapper object for geometric SQL."
     "Simple wrapper object for geometric SQL."
     def __init__(self, geo_sql):
     def __init__(self, geo_sql):
         self.sql = geo_sql
         self.sql = geo_sql
-    
+
     def as_sql(self, *args, **kwargs):
     def as_sql(self, *args, **kwargs):
         return self.sql
         return self.sql
 
 
@@ -30,7 +31,7 @@ class GeoQuerySet(QuerySet):
 
 
     def area(self, tolerance=0.05, **kwargs):
     def area(self, tolerance=0.05, **kwargs):
         """
         """
-        Returns the area of the geographic field in an `area` attribute on 
+        Returns the area of the geographic field in an `area` attribute on
         each element of this GeoQuerySet.
         each element of this GeoQuerySet.
         """
         """
         # Peforming setup here rather than in `_spatial_attribute` so that
         # Peforming setup here rather than in `_spatial_attribute` so that
@@ -75,21 +76,21 @@ class GeoQuerySet(QuerySet):
 
 
         Keyword Arguments:
         Keyword Arguments:
          `spheroid`  => If the geometry field is geodetic and PostGIS is
          `spheroid`  => If the geometry field is geodetic and PostGIS is
-                        the spatial database, then the more accurate 
+                        the spatial database, then the more accurate
                         spheroid calculation will be used instead of the
                         spheroid calculation will be used instead of the
                         quicker sphere calculation.
                         quicker sphere calculation.
-                        
-         `tolerance` => Used only for Oracle. The tolerance is 
-                        in meters -- a default of 5 centimeters (0.05) 
+
+         `tolerance` => Used only for Oracle. The tolerance is
+                        in meters -- a default of 5 centimeters (0.05)
                         is used.
                         is used.
         """
         """
         return self._distance_attribute('distance', geom, **kwargs)
         return self._distance_attribute('distance', geom, **kwargs)
 
 
     def envelope(self, **kwargs):
     def envelope(self, **kwargs):
         """
         """
-        Returns a Geometry representing the bounding box of the 
+        Returns a Geometry representing the bounding box of the
         Geometry field in an `envelope` attribute on each element of
         Geometry field in an `envelope` attribute on each element of
-        the GeoQuerySet. 
+        the GeoQuerySet.
         """
         """
         return self._geom_attribute('envelope', **kwargs)
         return self._geom_attribute('envelope', **kwargs)
 
 
@@ -98,20 +99,7 @@ class GeoQuerySet(QuerySet):
         Returns the extent (aggregate) of the features in the GeoQuerySet.  The
         Returns the extent (aggregate) of the features in the GeoQuerySet.  The
         extent will be returned as a 4-tuple, consisting of (xmin, ymin, xmax, ymax).
         extent will be returned as a 4-tuple, consisting of (xmin, ymin, xmax, ymax).
         """
         """
-        convert_extent = None
-        if SpatialBackend.postgis:
-            def convert_extent(box, geo_field):
-                # TODO: Parsing of BOX3D, Oracle support (patches welcome!)
-                # Box text will be something like "BOX(-90.0 30.0, -85.0 40.0)"; 
-                # parsing out and returning as a 4-tuple.
-                ll, ur = box[4:-1].split(',')
-                xmin, ymin = map(float, ll.split())
-                xmax, ymax = map(float, ur.split())
-                return (xmin, ymin, xmax, ymax)
-        elif SpatialBackend.oracle:
-            def convert_extent(wkt, geo_field):
-                raise NotImplementedError
-        return self._spatial_aggregate('extent', convert_func=convert_extent, **kwargs)
+        return self._spatial_aggregate(aggregates.Extent, **kwargs)
 
 
     def gml(self, precision=8, version=2, **kwargs):
     def gml(self, precision=8, version=2, **kwargs):
         """
         """
@@ -120,7 +108,7 @@ class GeoQuerySet(QuerySet):
         """
         """
         s = {'desc' : 'GML', 'procedure_args' : {'precision' : precision}}
         s = {'desc' : 'GML', 'procedure_args' : {'precision' : precision}}
         if SpatialBackend.postgis:
         if SpatialBackend.postgis:
-            # PostGIS AsGML() aggregate function parameter order depends on the 
+            # PostGIS AsGML() aggregate function parameter order depends on the
             # version -- uggh.
             # version -- uggh.
             major, minor1, minor2 = SpatialBackend.version
             major, minor1, minor2 = SpatialBackend.version
             if major >= 1 and (minor1 > 3 or (minor1 == 3 and minor2 > 1)):
             if major >= 1 and (minor1 > 3 or (minor1 == 3 and minor2 > 1)):
@@ -163,9 +151,7 @@ class GeoQuerySet(QuerySet):
         this GeoQuerySet and returns it.  This is a spatial aggregate
         this GeoQuerySet and returns it.  This is a spatial aggregate
         method, and thus returns a geometry rather than a GeoQuerySet.
         method, and thus returns a geometry rather than a GeoQuerySet.
         """
         """
-        kwargs['geo_field_type'] = PointField
-        kwargs['agg_field'] = GeometryField
-        return self._spatial_aggregate('make_line', **kwargs)
+        return self._spatial_aggregate(aggregates.MakeLine, geo_field_type=PointField, **kwargs)
 
 
     def mem_size(self, **kwargs):
     def mem_size(self, **kwargs):
         """
         """
@@ -185,7 +171,7 @@ class GeoQuerySet(QuerySet):
 
 
     def num_points(self, **kwargs):
     def num_points(self, **kwargs):
         """
         """
-        Returns the number of points in the first linestring in the 
+        Returns the number of points in the first linestring in the
         Geometry field in a `num_points` attribute on each element of
         Geometry field in a `num_points` attribute on each element of
         this GeoQuerySet; otherwise sets with None.
         this GeoQuerySet; otherwise sets with None.
         """
         """
@@ -231,7 +217,7 @@ class GeoQuerySet(QuerySet):
 
 
     def sym_difference(self, geom, **kwargs):
     def sym_difference(self, geom, **kwargs):
         """
         """
-        Returns the symmetric difference of the geographic field in a 
+        Returns the symmetric difference of the geographic field in a
         `sym_difference` attribute on each element of this GeoQuerySet.
         `sym_difference` attribute on each element of this GeoQuerySet.
         """
         """
         return self._geomset_attribute('sym_difference', geom, **kwargs)
         return self._geomset_attribute('sym_difference', geom, **kwargs)
@@ -265,7 +251,7 @@ class GeoQuerySet(QuerySet):
         # when there's also a transformation we need to cascade the substitutions.
         # when there's also a transformation we need to cascade the substitutions.
         # For example, 'SDO_UTIL.TO_WKTGEOMETRY(SDO_CS.TRANSFORM( ... )'
         # For example, 'SDO_UTIL.TO_WKTGEOMETRY(SDO_CS.TRANSFORM( ... )'
         geo_col = self.query.custom_select.get(geo_field, field_col)
         geo_col = self.query.custom_select.get(geo_field, field_col)
-        
+
         # Setting the key for the field's column with the custom SELECT SQL to
         # Setting the key for the field's column with the custom SELECT SQL to
         # override the geometry column returned from the database.
         # override the geometry column returned from the database.
         custom_sel = '%s(%s, %s)' % (SpatialBackend.transform, geo_col, srid)
         custom_sel = '%s(%s, %s)' % (SpatialBackend.transform, geo_col, srid)
@@ -288,11 +274,10 @@ class GeoQuerySet(QuerySet):
         None if the GeoQuerySet is empty.  The `tolerance` keyword is for
         None if the GeoQuerySet is empty.  The `tolerance` keyword is for
         Oracle backends only.
         Oracle backends only.
         """
         """
-        kwargs['agg_field'] = GeometryField
-        return self._spatial_aggregate('unionagg', **kwargs)
+        return self._spatial_aggregate(aggregates.Union, **kwargs)
 
 
     ### Private API -- Abstracted DRY routines. ###
     ### Private API -- Abstracted DRY routines. ###
-    def _spatial_setup(self, att, aggregate=False, desc=None, field_name=None, geo_field_type=None):
+    def _spatial_setup(self, att, desc=None, field_name=None, geo_field_type=None):
         """
         """
         Performs set up for executing the spatial function.
         Performs set up for executing the spatial function.
         """
         """
@@ -301,86 +286,52 @@ class GeoQuerySet(QuerySet):
         if desc is None: desc = att
         if desc is None: desc = att
         if not func: raise ImproperlyConfigured('%s stored procedure not available.' % desc)
         if not func: raise ImproperlyConfigured('%s stored procedure not available.' % desc)
 
 
-        # Initializing the procedure arguments. 
+        # Initializing the procedure arguments.
         procedure_args = {'function' : func}
         procedure_args = {'function' : func}
-        
-        # Is there a geographic field in the model to perform this 
+
+        # Is there a geographic field in the model to perform this
         # operation on?
         # operation on?
         geo_field = self.query._geo_field(field_name)
         geo_field = self.query._geo_field(field_name)
         if not geo_field:
         if not geo_field:
             raise TypeError('%s output only available on GeometryFields.' % func)
             raise TypeError('%s output only available on GeometryFields.' % func)
 
 
-        # If the `geo_field_type` keyword was used, then enforce that 
+        # If the `geo_field_type` keyword was used, then enforce that
         # type limitation.
         # type limitation.
-        if not geo_field_type is None and not isinstance(geo_field, geo_field_type): 
-            raise TypeError('"%s" stored procedures may only be called on %ss.' % (func, geo_field_type.__name__)) 
+        if not geo_field_type is None and not isinstance(geo_field, geo_field_type):
+            raise TypeError('"%s" stored procedures may only be called on %ss.' % (func, geo_field_type.__name__))
 
 
         # Setting the procedure args.
         # Setting the procedure args.
-        procedure_args['geo_col'] = self._geocol_select(geo_field, field_name, aggregate)
+        procedure_args['geo_col'] = self._geocol_select(geo_field, field_name)
 
 
         return procedure_args, geo_field
         return procedure_args, geo_field
 
 
-    def _spatial_aggregate(self, att, field_name=None, 
-                           agg_field=None, convert_func=None, 
-                           geo_field_type=None, tolerance=0.0005):
+    def _spatial_aggregate(self, aggregate, field_name=None,
+                           geo_field_type=None, tolerance=0.05):
         """
         """
         DRY routine for calling aggregate spatial stored procedures and
         DRY routine for calling aggregate spatial stored procedures and
         returning their result to the caller of the function.
         returning their result to the caller of the function.
         """
         """
-        # Constructing the setup keyword arguments.
-        setup_kwargs = {'aggregate' : True,
-                        'field_name' : field_name,
-                        'geo_field_type' : geo_field_type,
-                        }
-        procedure_args, geo_field = self._spatial_setup(att, **setup_kwargs)
-        
-        if SpatialBackend.oracle:
-            procedure_args['tolerance'] = tolerance
-            # Adding in selection SQL for Oracle geometry columns.
-            if agg_field is GeometryField: 
-                agg_sql = '%s' % SpatialBackend.select
-            else: 
-                agg_sql = '%s'
-            agg_sql =  agg_sql % ('%(function)s(SDOAGGRTYPE(%(geo_col)s,%(tolerance)s))' % procedure_args)
-        else:
-            agg_sql = '%(function)s(%(geo_col)s)' % procedure_args
-
-        # Wrapping our selection SQL in `GeomSQL` to bypass quoting, and
-        # specifying the type of the aggregate field.
-        self.query.select = [GeomSQL(agg_sql)]
-        self.query.select_fields = [agg_field]
-
-        try:
-            # `asql` => not overriding `sql` module.
-            asql, params = self.query.as_sql()
-        except sql.datastructures.EmptyResultSet:
-            return None   
-
-        # Getting a cursor, executing the query, and extracting the returned
-        # value from the aggregate function.
-        cursor = connection.cursor()
-        cursor.execute(asql, params)
-        result = cursor.fetchone()[0]
-        
-        # If the `agg_field` is specified as a GeometryField, then autmatically
-        # set up the conversion function.
-        if agg_field is GeometryField and not callable(convert_func):
-            if SpatialBackend.postgis:
-                def convert_geom(hex, geo_field):
-                    if hex: return SpatialBackend.Geometry(hex)
-                    else: return None
-            elif SpatialBackend.oracle:
-                def convert_geom(clob, geo_field):
-                    if clob: return SpatialBackend.Geometry(clob.read(), geo_field._srid)
-                    else: return None
-            convert_func = convert_geom
-
-        # Returning the callback function evaluated on the result culled
-        # from the executed cursor.
-        if callable(convert_func):
-            return convert_func(result, geo_field)
-        else:
-            return result
+        # Getting the field the geographic aggregate will be called on.
+        geo_field = self.query._geo_field(field_name)
+        if not geo_field:
+            raise TypeError('%s aggregate only available on GeometryFields.' % aggregate.name)
+
+        # Checking if there are any geo field type limitations on this
+        # aggregate (e.g. ST_Makeline only operates on PointFields).
+        if not geo_field_type is None and not isinstance(geo_field, geo_field_type):
+            raise TypeError('%s aggregate may only be called on %ss.' % (aggregate.name, geo_field_type.__name__))
+
+        # Getting the string expression of the field name, as this is the
+        # argument taken by `Aggregate` objects.
+        agg_col = field_name or geo_field.name
+
+        # Adding any keyword parameters for the Aggregate object. Oracle backends
+        # in particular need an additional `tolerance` parameter.
+        agg_kwargs = {}
+        if SpatialBackend.oracle: agg_kwargs['tolerance'] = tolerance
+
+        # Calling the QuerySet.aggregate, and returning only the value of the aggregate.
+        return self.aggregate(_geoagg=aggregate(agg_col, **agg_kwargs))['_geoagg']
 
 
     def _spatial_attribute(self, att, settings, field_name=None, model_att=None):
     def _spatial_attribute(self, att, settings, field_name=None, model_att=None):
         """
         """
@@ -393,7 +344,7 @@ class GeoQuerySet(QuerySet):
           SQL function to call.
           SQL function to call.
 
 
          settings:
          settings:
-          Dictonary of internal settings to customize for the spatial procedure. 
+          Dictonary of internal settings to customize for the spatial procedure.
 
 
         Public Keyword Arguments:
         Public Keyword Arguments:
 
 
@@ -420,7 +371,7 @@ class GeoQuerySet(QuerySet):
             for k, v in default_args.iteritems(): settings['procedure_args'].setdefault(k, v)
             for k, v in default_args.iteritems(): settings['procedure_args'].setdefault(k, v)
         else:
         else:
             geo_field = settings['geo_field']
             geo_field = settings['geo_field']
-            
+
         # The attribute to attach to the model.
         # The attribute to attach to the model.
         if not isinstance(model_att, basestring): model_att = att
         if not isinstance(model_att, basestring): model_att = att
 
 
@@ -429,7 +380,7 @@ class GeoQuerySet(QuerySet):
             # Using the field's get_db_prep_lookup() to get any needed
             # Using the field's get_db_prep_lookup() to get any needed
             # transformation SQL -- we pass in a 'dummy' `contains` lookup.
             # transformation SQL -- we pass in a 'dummy' `contains` lookup.
             where, params = geo_field.get_db_prep_lookup('contains', settings['procedure_args'][name])
             where, params = geo_field.get_db_prep_lookup('contains', settings['procedure_args'][name])
-            # Replacing the procedure format with that of any needed 
+            # Replacing the procedure format with that of any needed
             # transformation SQL.
             # transformation SQL.
             old_fmt = '%%(%s)s' % name
             old_fmt = '%%(%s)s' % name
             new_fmt = where[0] % '%%s'
             new_fmt = where[0] % '%%s'
@@ -438,7 +389,7 @@ class GeoQuerySet(QuerySet):
 
 
         # Getting the format for the stored procedure.
         # Getting the format for the stored procedure.
         fmt = '%%(function)s(%s)' % settings['procedure_fmt']
         fmt = '%%(function)s(%s)' % settings['procedure_fmt']
-        
+
         # If the result of this function needs to be converted.
         # If the result of this function needs to be converted.
         if settings.get('select_field', False):
         if settings.get('select_field', False):
             sel_fld = settings['select_field']
             sel_fld = settings['select_field']
@@ -446,10 +397,10 @@ class GeoQuerySet(QuerySet):
                 self.query.custom_select[model_att] = SpatialBackend.select
                 self.query.custom_select[model_att] = SpatialBackend.select
             self.query.extra_select_fields[model_att] = sel_fld
             self.query.extra_select_fields[model_att] = sel_fld
 
 
-        # Finally, setting the extra selection attribute with 
+        # Finally, setting the extra selection attribute with
         # the format string expanded with the stored procedure
         # the format string expanded with the stored procedure
         # arguments.
         # arguments.
-        return self.extra(select={model_att : fmt % settings['procedure_args']}, 
+        return self.extra(select={model_att : fmt % settings['procedure_args']},
                           select_params=settings['select_params'])
                           select_params=settings['select_params'])
 
 
     def _distance_attribute(self, func, geom=None, tolerance=0.05, spheroid=False, **kwargs):
     def _distance_attribute(self, func, geom=None, tolerance=0.05, spheroid=False, **kwargs):
@@ -471,10 +422,10 @@ class GeoQuerySet(QuerySet):
         distance = func == 'distance'
         distance = func == 'distance'
         length = func == 'length'
         length = func == 'length'
         perimeter = func == 'perimeter'
         perimeter = func == 'perimeter'
-        if not (distance or length or perimeter): 
+        if not (distance or length or perimeter):
             raise ValueError('Unknown distance function: %s' % func)
             raise ValueError('Unknown distance function: %s' % func)
 
 
-        # The field's get_db_prep_lookup() is used to get any 
+        # The field's get_db_prep_lookup() is used to get any
         # extra distance parameters.  Here we set up the
         # extra distance parameters.  Here we set up the
         # parameters that will be passed in to field's function.
         # parameters that will be passed in to field's function.
         lookup_params = [geom or 'POINT (0 0)', 0]
         lookup_params = [geom or 'POINT (0 0)', 0]
@@ -482,12 +433,12 @@ class GeoQuerySet(QuerySet):
         # If the spheroid calculation is desired, either by the `spheroid`
         # If the spheroid calculation is desired, either by the `spheroid`
         # keyword or wehn calculating the length of geodetic field, make
         # keyword or wehn calculating the length of geodetic field, make
         # sure the 'spheroid' distance setting string is passed in so we
         # sure the 'spheroid' distance setting string is passed in so we
-        # get the correct spatial stored procedure.            
-        if spheroid or (SpatialBackend.postgis and geo_field.geodetic and length): 
-            lookup_params.append('spheroid') 
+        # get the correct spatial stored procedure.
+        if spheroid or (SpatialBackend.postgis and geo_field.geodetic and length):
+            lookup_params.append('spheroid')
         where, params = geo_field.get_db_prep_lookup('distance_lte', lookup_params)
         where, params = geo_field.get_db_prep_lookup('distance_lte', lookup_params)
 
 
-        # The `geom_args` flag is set to true if a geometry parameter was 
+        # The `geom_args` flag is set to true if a geometry parameter was
         # passed in.
         # passed in.
         geom_args = bool(geom)
         geom_args = bool(geom)
 
 
@@ -505,7 +456,7 @@ class GeoQuerySet(QuerySet):
                 geodetic = unit_name in geo_field.geodetic_units
                 geodetic = unit_name in geo_field.geodetic_units
             else:
             else:
                 geodetic = geo_field.geodetic
                 geodetic = geo_field.geodetic
-            
+
             if distance:
             if distance:
                 if self.query.transformed_srid:
                 if self.query.transformed_srid:
                     # Setting the `geom_args` flag to false because we want to handle
                     # Setting the `geom_args` flag to false because we want to handle
@@ -515,7 +466,7 @@ class GeoQuerySet(QuerySet):
                     geom_args = False
                     geom_args = False
                     procedure_fmt = '%s(%%(geo_col)s, %s)' % (SpatialBackend.transform, self.query.transformed_srid)
                     procedure_fmt = '%s(%%(geo_col)s, %s)' % (SpatialBackend.transform, self.query.transformed_srid)
                     if geom.srid is None or geom.srid == self.query.transformed_srid:
                     if geom.srid is None or geom.srid == self.query.transformed_srid:
-                        # If the geom parameter srid is None, it is assumed the coordinates 
+                        # If the geom parameter srid is None, it is assumed the coordinates
                         # are in the transformed units.  A placeholder is used for the
                         # are in the transformed units.  A placeholder is used for the
                         # geometry parameter.
                         # geometry parameter.
                         procedure_fmt += ', %%s'
                         procedure_fmt += ', %%s'
@@ -529,10 +480,10 @@ class GeoQuerySet(QuerySet):
 
 
                 if geodetic:
                 if geodetic:
                     # Spherical distance calculation is needed (because the geographic
                     # Spherical distance calculation is needed (because the geographic
-                    # field is geodetic). However, the PostGIS ST_distance_sphere/spheroid() 
+                    # field is geodetic). However, the PostGIS ST_distance_sphere/spheroid()
                     # procedures may only do queries from point columns to point geometries
                     # procedures may only do queries from point columns to point geometries
                     # some error checking is required.
                     # some error checking is required.
-                    if not isinstance(geo_field, PointField): 
+                    if not isinstance(geo_field, PointField):
                         raise TypeError('Spherical distance calculation only supported on PointFields.')
                         raise TypeError('Spherical distance calculation only supported on PointFields.')
                     if not str(SpatialBackend.Geometry(buffer(params[0].wkb)).geom_type) == 'Point':
                     if not str(SpatialBackend.Geometry(buffer(params[0].wkb)).geom_type) == 'Point':
                         raise TypeError('Spherical distance calculation only supported with Point Geometry parameters')
                         raise TypeError('Spherical distance calculation only supported with Point Geometry parameters')
@@ -553,12 +504,12 @@ class GeoQuerySet(QuerySet):
 
 
         # Setting up the settings for `_spatial_attribute`.
         # Setting up the settings for `_spatial_attribute`.
         s = {'select_field' : DistanceField(dist_att),
         s = {'select_field' : DistanceField(dist_att),
-             'setup' : False, 
+             'setup' : False,
              'geo_field' : geo_field,
              'geo_field' : geo_field,
              'procedure_args' : procedure_args,
              'procedure_args' : procedure_args,
              'procedure_fmt' : procedure_fmt,
              'procedure_fmt' : procedure_fmt,
              }
              }
-        if geom_args: 
+        if geom_args:
             s['geom_args'] = ('geom',)
             s['geom_args'] = ('geom',)
             s['procedure_args']['geom'] = geom
             s['procedure_args']['geom'] = geom
         elif geom:
         elif geom:
@@ -577,12 +528,12 @@ class GeoQuerySet(QuerySet):
             s['procedure_fmt'] = '%(geo_col)s,%(tolerance)s'
             s['procedure_fmt'] = '%(geo_col)s,%(tolerance)s'
             s['procedure_args'] = {'tolerance' : tolerance}
             s['procedure_args'] = {'tolerance' : tolerance}
         return self._spatial_attribute(func, s, **kwargs)
         return self._spatial_attribute(func, s, **kwargs)
-                     
+
     def _geomset_attribute(self, func, geom, tolerance=0.05, **kwargs):
     def _geomset_attribute(self, func, geom, tolerance=0.05, **kwargs):
         """
         """
         DRY routine for setting up a GeoQuerySet method that attaches a
         DRY routine for setting up a GeoQuerySet method that attaches a
         Geometry attribute and takes a Geoemtry parameter.  This is used
         Geometry attribute and takes a Geoemtry parameter.  This is used
-        for geometry set-like operations (e.g., intersection, difference, 
+        for geometry set-like operations (e.g., intersection, difference,
         union, sym_difference).
         union, sym_difference).
         """
         """
         s = {'geom_args' : ('geom',),
         s = {'geom_args' : ('geom',),
@@ -595,16 +546,12 @@ class GeoQuerySet(QuerySet):
             s['procedure_args']['tolerance'] = tolerance
             s['procedure_args']['tolerance'] = tolerance
         return self._spatial_attribute(func, s, **kwargs)
         return self._spatial_attribute(func, s, **kwargs)
 
 
-    def _geocol_select(self, geo_field, field_name, aggregate=False):
+    def _geocol_select(self, geo_field, field_name):
         """
         """
         Helper routine for constructing the SQL to select the geographic
         Helper routine for constructing the SQL to select the geographic
         column.  Takes into account if the geographic field is in a
         column.  Takes into account if the geographic field is in a
         ForeignKey relation to the current model.
         ForeignKey relation to the current model.
         """
         """
-        # If this is an aggregate spatial query, the flag needs to be
-        # set on the `GeoQuery` object of this queryset.
-        if aggregate: self.query.aggregate = True
-
         opts = self.model._meta
         opts = self.model._meta
         if not geo_field in opts.fields:
         if not geo_field in opts.fields:
             # Is this operation going to be on a related geographic field?
             # Is this operation going to be on a related geographic field?

+ 36 - 0
django/contrib/gis/db/models/sql/aggregates.py

@@ -0,0 +1,36 @@
+from django.db.models.sql.aggregates import *
+
+from django.contrib.gis.db.models.fields import GeometryField
+from django.contrib.gis.db.backend import SpatialBackend
+
+if SpatialBackend.oracle:
+    geo_template = '%(function)s(SDOAGGRTYPE(%(field)s,%(tolerance)s))'
+else:
+    geo_template = '%(function)s(%(field)s)'
+
+class GeoAggregate(Aggregate):
+    # Overriding the SQL template with the geographic one.
+    sql_template = geo_template
+
+    is_extent = False
+
+    def __init__(self, col, source=None, is_summary=False, **extra):
+        super(GeoAggregate, self).__init__(col, source, is_summary, **extra)
+
+        # Can't use geographic aggregates on non-geometry fields.
+        if not isinstance(self.source, GeometryField):
+            raise ValueError('Geospatial aggregates only allowed on geometry fields.')
+
+        # Making sure the SQL function is available for this spatial backend.
+        if not self.sql_function:
+            raise NotImplementedError('This aggregate functionality not implemented for your spatial backend.')
+
+class Extent(GeoAggregate):
+    is_extent = True
+    sql_function = SpatialBackend.extent
+
+class MakeLine(GeoAggregate):
+    sql_function = SpatialBackend.make_line
+
+class Union(GeoAggregate):
+    sql_function = SpatialBackend.unionagg

+ 85 - 45
django/contrib/gis/db/models/sql/query.py

@@ -5,6 +5,7 @@ from django.db.models.fields.related import ForeignKey
 
 
 from django.contrib.gis.db.backend import SpatialBackend
 from django.contrib.gis.db.backend import SpatialBackend
 from django.contrib.gis.db.models.fields import GeometryField
 from django.contrib.gis.db.models.fields import GeometryField
+from django.contrib.gis.db.models.sql import aggregates as gis_aggregates_module
 from django.contrib.gis.db.models.sql.where import GeoWhereNode
 from django.contrib.gis.db.models.sql.where import GeoWhereNode
 from django.contrib.gis.measure import Area, Distance
 from django.contrib.gis.measure import Area, Distance
 
 
@@ -12,12 +13,35 @@ from django.contrib.gis.measure import Area, Distance
 ALL_TERMS = sql.constants.QUERY_TERMS.copy()
 ALL_TERMS = sql.constants.QUERY_TERMS.copy()
 ALL_TERMS.update(SpatialBackend.gis_terms)
 ALL_TERMS.update(SpatialBackend.gis_terms)
 
 
+# Conversion functions used in normalizing geographic aggregates.
+if SpatialBackend.postgis:
+    def convert_extent(box):
+        # TODO: Parsing of BOX3D, Oracle support (patches welcome!)
+        # Box text will be something like "BOX(-90.0 30.0, -85.0 40.0)";
+        # parsing out and returning as a 4-tuple.
+        ll, ur = box[4:-1].split(',')
+        xmin, ymin = map(float, ll.split())
+        xmax, ymax = map(float, ur.split())
+        return (xmin, ymin, xmax, ymax)
+
+    def convert_geom(hex, geo_field):
+        if hex: return SpatialBackend.Geometry(hex)
+        else: return None
+else:
+    def convert_extent(box):
+        raise NotImplementedError('Aggregate extent not implemented for this spatial backend.')
+
+    def convert_geom(clob, geo_field):
+        if clob: return SpatialBackend.Geometry(clob.read(), geo_field._srid)
+        else: return None
+
 class GeoQuery(sql.Query):
 class GeoQuery(sql.Query):
     """
     """
     A single spatial SQL query.
     A single spatial SQL query.
     """
     """
     # Overridding the valid query terms.
     # Overridding the valid query terms.
     query_terms = ALL_TERMS
     query_terms = ALL_TERMS
+    aggregates_module = gis_aggregates_module
 
 
     #### Methods overridden from the base Query class ####
     #### Methods overridden from the base Query class ####
     def __init__(self, model, conn):
     def __init__(self, model, conn):
@@ -25,7 +49,6 @@ class GeoQuery(sql.Query):
         # The following attributes are customized for the GeoQuerySet.
         # The following attributes are customized for the GeoQuerySet.
         # The GeoWhereNode and SpatialBackend classes contain backend-specific
         # The GeoWhereNode and SpatialBackend classes contain backend-specific
         # routines and functions.
         # routines and functions.
-        self.aggregate = False
         self.custom_select = {}
         self.custom_select = {}
         self.transformed_srid = None
         self.transformed_srid = None
         self.extra_select_fields = {}
         self.extra_select_fields = {}
@@ -34,7 +57,6 @@ class GeoQuery(sql.Query):
         obj = super(GeoQuery, self).clone(*args, **kwargs)
         obj = super(GeoQuery, self).clone(*args, **kwargs)
         # Customized selection dictionary and transformed srid flag have
         # Customized selection dictionary and transformed srid flag have
         # to also be added to obj.
         # to also be added to obj.
-        obj.aggregate = self.aggregate
         obj.custom_select = self.custom_select.copy()
         obj.custom_select = self.custom_select.copy()
         obj.transformed_srid = self.transformed_srid
         obj.transformed_srid = self.transformed_srid
         obj.extra_select_fields = self.extra_select_fields.copy()
         obj.extra_select_fields = self.extra_select_fields.copy()
@@ -50,12 +72,12 @@ class GeoQuery(sql.Query):
         (without the table names) are given unique aliases. This is needed in
         (without the table names) are given unique aliases. This is needed in
         some cases to avoid ambiguitity with nested queries.
         some cases to avoid ambiguitity with nested queries.
 
 
-        This routine is overridden from Query to handle customized selection of 
+        This routine is overridden from Query to handle customized selection of
         geometry columns.
         geometry columns.
         """
         """
         qn = self.quote_name_unless_alias
         qn = self.quote_name_unless_alias
         qn2 = self.connection.ops.quote_name
         qn2 = self.connection.ops.quote_name
-        result = ['(%s) AS %s' % (self.get_extra_select_format(alias) % col[0], qn2(alias)) 
+        result = ['(%s) AS %s' % (self.get_extra_select_format(alias) % col[0], qn2(alias))
                   for alias, col in self.extra_select.iteritems()]
                   for alias, col in self.extra_select.iteritems()]
         aliases = set(self.extra_select.keys())
         aliases = set(self.extra_select.keys())
         if with_aliases:
         if with_aliases:
@@ -67,38 +89,53 @@ class GeoQuery(sql.Query):
             for col, field in izip(self.select, self.select_fields):
             for col, field in izip(self.select, self.select_fields):
                 if isinstance(col, (list, tuple)):
                 if isinstance(col, (list, tuple)):
                     r = self.get_field_select(field, col[0])
                     r = self.get_field_select(field, col[0])
-                    if with_aliases and col[1] in col_aliases:
-                        c_alias = 'Col%d' % len(col_aliases)
-                        result.append('%s AS %s' % (r, c_alias))
-                        aliases.add(c_alias)
-                        col_aliases.add(c_alias)
+                    if with_aliases:
+                        if col[1] in col_aliases:
+                            c_alias = 'Col%d' % len(col_aliases)
+                            result.append('%s AS %s' % (r, c_alias))
+                            aliases.add(c_alias)
+                            col_aliases.add(c_alias)
+                        else:
+                            result.append('%s AS %s' % (r, col[1]))
+                            aliases.add(r)
+                            col_aliases.add(col[1])
                     else:
                     else:
                         result.append(r)
                         result.append(r)
                         aliases.add(r)
                         aliases.add(r)
                         col_aliases.add(col[1])
                         col_aliases.add(col[1])
                 else:
                 else:
                     result.append(col.as_sql(quote_func=qn))
                     result.append(col.as_sql(quote_func=qn))
+
                     if hasattr(col, 'alias'):
                     if hasattr(col, 'alias'):
                         aliases.add(col.alias)
                         aliases.add(col.alias)
                         col_aliases.add(col.alias)
                         col_aliases.add(col.alias)
+
         elif self.default_cols:
         elif self.default_cols:
             cols, new_aliases = self.get_default_columns(with_aliases,
             cols, new_aliases = self.get_default_columns(with_aliases,
                     col_aliases)
                     col_aliases)
             result.extend(cols)
             result.extend(cols)
             aliases.update(new_aliases)
             aliases.update(new_aliases)
+
+        result.extend([
+                '%s%s' % (
+                    aggregate.as_sql(quote_func=qn),
+                    alias is not None and ' AS %s' % alias or ''
+                    )
+                for alias, aggregate in self.aggregate_select.items()
+                ])
+
         # This loop customized for GeoQuery.
         # This loop customized for GeoQuery.
-        if not self.aggregate:
-            for (table, col), field in izip(self.related_select_cols, self.related_select_fields):
-                r = self.get_field_select(field, table)
-                if with_aliases and col in col_aliases:
-                    c_alias = 'Col%d' % len(col_aliases)
-                    result.append('%s AS %s' % (r, c_alias))
-                    aliases.add(c_alias)
-                    col_aliases.add(c_alias)
-                else:
-                    result.append(r)
-                    aliases.add(r)
-                    col_aliases.add(col)
+        for (table, col), field in izip(self.related_select_cols, self.related_select_fields):
+            r = self.get_field_select(field, table)
+            if with_aliases and col in col_aliases:
+                c_alias = 'Col%d' % len(col_aliases)
+                result.append('%s AS %s' % (r, c_alias))
+                aliases.add(c_alias)
+                col_aliases.add(c_alias)
+            else:
+                result.append(r)
+                aliases.add(r)
+                col_aliases.add(col)
 
 
         self._select_aliases = aliases
         self._select_aliases = aliases
         return result
         return result
@@ -112,7 +149,7 @@ class GeoQuery(sql.Query):
         Returns a list of strings, quoted appropriately for use in SQL
         Returns a list of strings, quoted appropriately for use in SQL
         directly, as well as a set of aliases used in the select statement.
         directly, as well as a set of aliases used in the select statement.
 
 
-        This routine is overridden from Query to handle customized selection of 
+        This routine is overridden from Query to handle customized selection of
         geometry columns.
         geometry columns.
         """
         """
         result = []
         result = []
@@ -154,20 +191,10 @@ class GeoQuery(sql.Query):
             return result, None
             return result, None
         return result, aliases
         return result, aliases
 
 
-    def get_ordering(self):
-        """
-        This routine is overridden to disable ordering for aggregate
-        spatial queries.
-        """
-        if not self.aggregate:
-            return super(GeoQuery, self).get_ordering()
-        else:
-            return ()
-
     def resolve_columns(self, row, fields=()):
     def resolve_columns(self, row, fields=()):
         """
         """
         This routine is necessary so that distances and geometries returned
         This routine is necessary so that distances and geometries returned
-        from extra selection SQL get resolved appropriately into Python 
+        from extra selection SQL get resolved appropriately into Python
         objects.
         objects.
         """
         """
         values = []
         values = []
@@ -183,7 +210,7 @@ class GeoQuery(sql.Query):
 
 
         # Converting any extra selection values (e.g., geometries and
         # Converting any extra selection values (e.g., geometries and
         # distance objects added by GeoQuerySet methods).
         # distance objects added by GeoQuerySet methods).
-        values = [self.convert_values(v, self.extra_select_fields.get(a, None)) 
+        values = [self.convert_values(v, self.extra_select_fields.get(a, None))
                   for v, a in izip(row[rn_offset:index_start], aliases)]
                   for v, a in izip(row[rn_offset:index_start], aliases)]
         if SpatialBackend.oracle:
         if SpatialBackend.oracle:
             # This is what happens normally in OracleQuery's `resolve_columns`.
             # This is what happens normally in OracleQuery's `resolve_columns`.
@@ -212,6 +239,19 @@ class GeoQuery(sql.Query):
             value = SpatialBackend.Geometry(value)
             value = SpatialBackend.Geometry(value)
         return value
         return value
 
 
+    def resolve_aggregate(self, value, aggregate):
+        """
+        Overridden from GeoQuery's normalize to handle the conversion of
+        GeoAggregate objects.
+        """
+        if isinstance(aggregate, self.aggregates_module.GeoAggregate):
+            if aggregate.is_extent:
+                return convert_extent(value)
+            else:
+                return convert_geom(value, aggregate.source)
+        else:
+            return super(GeoQuery, self).resolve_aggregate(value, aggregate)
+
     #### Routines unique to GeoQuery ####
     #### Routines unique to GeoQuery ####
     def get_extra_select_format(self, alias):
     def get_extra_select_format(self, alias):
         sel_fmt = '%s'
         sel_fmt = '%s'
@@ -222,9 +262,9 @@ class GeoQuery(sql.Query):
     def get_field_select(self, fld, alias=None):
     def get_field_select(self, fld, alias=None):
         """
         """
         Returns the SELECT SQL string for the given field.  Figures out
         Returns the SELECT SQL string for the given field.  Figures out
-        if any custom selection SQL is needed for the column  The `alias` 
-        keyword may be used to manually specify the database table where 
-        the column exists, if not in the model associated with this 
+        if any custom selection SQL is needed for the column  The `alias`
+        keyword may be used to manually specify the database table where
+        the column exists, if not in the model associated with this
         `GeoQuery`.
         `GeoQuery`.
         """
         """
         sel_fmt = self.get_select_format(fld)
         sel_fmt = self.get_select_format(fld)
@@ -263,15 +303,15 @@ class GeoQuery(sql.Query):
         """
         """
         Recursive utility routine for checking the given name parameter
         Recursive utility routine for checking the given name parameter
         on the given model.  Initially, the name parameter is a string,
         on the given model.  Initially, the name parameter is a string,
-        of the field on the given model e.g., 'point', 'the_geom'. 
-        Related model field strings like 'address__point', may also be 
+        of the field on the given model e.g., 'point', 'the_geom'.
+        Related model field strings like 'address__point', may also be
         used.
         used.
 
 
-        If a GeometryField exists according to the given name parameter 
+        If a GeometryField exists according to the given name parameter
         it will be returned, otherwise returns False.
         it will be returned, otherwise returns False.
         """
         """
         if isinstance(name_param, basestring):
         if isinstance(name_param, basestring):
-            # This takes into account the situation where the name is a 
+            # This takes into account the situation where the name is a
             # lookup to a related geographic field, e.g., 'address__point'.
             # lookup to a related geographic field, e.g., 'address__point'.
             name_param = name_param.split(sql.constants.LOOKUP_SEP)
             name_param = name_param.split(sql.constants.LOOKUP_SEP)
             name_param.reverse() # Reversing so list operates like a queue of related lookups.
             name_param.reverse() # Reversing so list operates like a queue of related lookups.
@@ -284,7 +324,7 @@ class GeoQuery(sql.Query):
         except (FieldDoesNotExist, IndexError):
         except (FieldDoesNotExist, IndexError):
             return False
             return False
         # TODO: ManyToManyField?
         # TODO: ManyToManyField?
-        if isinstance(fld, GeometryField): 
+        if isinstance(fld, GeometryField):
             return fld # A-OK.
             return fld # A-OK.
         elif isinstance(fld, ForeignKey):
         elif isinstance(fld, ForeignKey):
             # ForeignKey encountered, return the output of this utility called
             # ForeignKey encountered, return the output of this utility called
@@ -297,12 +337,12 @@ class GeoQuery(sql.Query):
         """
         """
         Helper function that returns the database column for the given field.
         Helper function that returns the database column for the given field.
         The table and column are returned (quoted) in the proper format, e.g.,
         The table and column are returned (quoted) in the proper format, e.g.,
-        `"geoapp_city"."point"`.  If `table_alias` is not specified, the 
+        `"geoapp_city"."point"`.  If `table_alias` is not specified, the
         database table associated with the model of this `GeoQuery` will be
         database table associated with the model of this `GeoQuery` will be
         used.
         used.
         """
         """
         if table_alias is None: table_alias = self.model._meta.db_table
         if table_alias is None: table_alias = self.model._meta.db_table
-        return "%s.%s" % (self.quote_name_unless_alias(table_alias), 
+        return "%s.%s" % (self.quote_name_unless_alias(table_alias),
                           self.connection.ops.quote_name(field.column))
                           self.connection.ops.quote_name(field.column))
 
 
     def _geo_field(self, field_name=None):
     def _geo_field(self, field_name=None):
@@ -333,5 +373,5 @@ class DistanceField(object):
 
 
 # Rather than use GeometryField (which requires a SQL query
 # Rather than use GeometryField (which requires a SQL query
 # upon instantiation), use this lighter weight class.
 # upon instantiation), use this lighter weight class.
-class GeomField(object): 
+class GeomField(object):
     pass
     pass

+ 23 - 0
django/db/backends/__init__.py

@@ -10,6 +10,12 @@ except NameError:
     # Python 2.3 compat
     # Python 2.3 compat
     from sets import Set as set
     from sets import Set as set
 
 
+try:
+    import decimal
+except ImportError:
+    # Python 2.3 fallback
+    from django.utils import _decimal as decimal
+
 from django.db.backends import util
 from django.db.backends import util
 from django.utils import datetime_safe
 from django.utils import datetime_safe
 
 
@@ -62,6 +68,7 @@ class BaseDatabaseWrapper(local):
         return util.CursorDebugWrapper(cursor, self)
         return util.CursorDebugWrapper(cursor, self)
 
 
 class BaseDatabaseFeatures(object):
 class BaseDatabaseFeatures(object):
+    allows_group_by_pk = False
     # True if django.db.backend.utils.typecast_timestamp is used on values
     # True if django.db.backend.utils.typecast_timestamp is used on values
     # returned from dates() calls.
     # returned from dates() calls.
     needs_datetime_string_cast = True
     needs_datetime_string_cast = True
@@ -376,6 +383,22 @@ class BaseDatabaseOperations(object):
         """
         """
         return self.year_lookup_bounds(value)
         return self.year_lookup_bounds(value)
 
 
+    def convert_values(self, value, field):
+        """Coerce the value returned by the database backend into a consistent type that
+        is compatible with the field type.
+        """
+        internal_type = field.get_internal_type()
+        if internal_type == 'DecimalField':
+            return value
+        elif internal_type and internal_type.endswith('IntegerField') or internal_type == 'AutoField':
+            return int(value)
+        elif internal_type in ('DateField', 'DateTimeField', 'TimeField'):
+            return value
+        # No field, or the field isn't known to be a decimal or integer
+        # Default to a float
+        return float(value)
+
+
 class BaseDatabaseIntrospection(object):
 class BaseDatabaseIntrospection(object):
     """
     """
     This class encapsulates all backend-specific introspection utilities
     This class encapsulates all backend-specific introspection utilities

+ 1 - 0
django/db/backends/mysql/base.py

@@ -110,6 +110,7 @@ class CursorWrapper(object):
 class DatabaseFeatures(BaseDatabaseFeatures):
 class DatabaseFeatures(BaseDatabaseFeatures):
     empty_fetchmany_value = ()
     empty_fetchmany_value = ()
     update_can_self_select = False
     update_can_self_select = False
+    allows_group_by_pk = True
     related_fields_match_type = True
     related_fields_match_type = True
 
 
 class DatabaseOperations(BaseDatabaseOperations):
 class DatabaseOperations(BaseDatabaseOperations):

+ 9 - 11
django/db/backends/oracle/query.py

@@ -53,21 +53,23 @@ def query_class(QueryClass, Database):
             return values
             return values
 
 
         def convert_values(self, value, field):
         def convert_values(self, value, field):
-            from django.db.models.fields import DateField, DateTimeField, \
-                 TimeField, BooleanField, NullBooleanField, DecimalField, Field
+            from django.db.models.fields import Field
             if isinstance(value, Database.LOB):
             if isinstance(value, Database.LOB):
                 value = value.read()
                 value = value.read()
             # Oracle stores empty strings as null. We need to undo this in
             # Oracle stores empty strings as null. We need to undo this in
             # order to adhere to the Django convention of using the empty
             # order to adhere to the Django convention of using the empty
             # string instead of null, but only if the field accepts the
             # string instead of null, but only if the field accepts the
             # empty string.
             # empty string.
-            if value is None and isinstance(field, Field) and field.empty_strings_allowed:
+            if value is None and field and field.empty_strings_allowed:
                 value = u''
                 value = u''
             # Convert 1 or 0 to True or False
             # Convert 1 or 0 to True or False
-            elif value in (1, 0) and isinstance(field, (BooleanField, NullBooleanField)):
+            elif value in (1, 0) and field and field.get_internal_type() in ('BooleanField', 'NullBooleanField'):
                 value = bool(value)
                 value = bool(value)
+            # Force floats to the correct type
+            elif value is not None and field and field.get_internal_type() == 'FloatField':
+                value = float(value)
             # Convert floats to decimals
             # Convert floats to decimals
-            elif value is not None and isinstance(field, DecimalField):
+            elif value is not None and field and field.get_internal_type() == 'DecimalField':
                 value = util.typecast_decimal(field.format_number(value))
                 value = util.typecast_decimal(field.format_number(value))
             # cx_Oracle always returns datetime.datetime objects for
             # cx_Oracle always returns datetime.datetime objects for
             # DATE and TIMESTAMP columns, but Django wants to see a
             # DATE and TIMESTAMP columns, but Django wants to see a
@@ -86,13 +88,9 @@ def query_class(QueryClass, Database):
                     value = datetime.datetime(value.year, value.month,
                     value = datetime.datetime(value.year, value.month,
                             value.day, value.hour, value.minute, value.second,
                             value.day, value.hour, value.minute, value.second,
                             value.fsecond)
                             value.fsecond)
-                if isinstance(field, DateTimeField):
-                    # DateTimeField subclasses DateField so must be checked
-                    # first.
-                    pass
-                elif isinstance(field, DateField):
+                if field and field.get_internal_type() == 'DateField':
                     value = value.date()
                     value = value.date()
-                elif isinstance(field, TimeField) or (value.year == 1900 and value.month == value.day == 1):
+                elif field and field.get_internal_type() == 'TimeField' or (value.year == 1900 and value.month == value.day == 1):
                     value = value.time()
                     value = value.time()
                 elif value.hour == value.minute == value.second == value.microsecond == 0:
                 elif value.hour == value.minute == value.second == value.microsecond == 0:
                     value = value.date()
                     value = value.date()

+ 21 - 1
django/db/backends/sqlite3/base.py

@@ -10,7 +10,7 @@ from django.db.backends import *
 from django.db.backends.sqlite3.client import DatabaseClient
 from django.db.backends.sqlite3.client import DatabaseClient
 from django.db.backends.sqlite3.creation import DatabaseCreation
 from django.db.backends.sqlite3.creation import DatabaseCreation
 from django.db.backends.sqlite3.introspection import DatabaseIntrospection
 from django.db.backends.sqlite3.introspection import DatabaseIntrospection
-from django.utils.safestring import SafeString                                                           
+from django.utils.safestring import SafeString
 
 
 try:
 try:
     try:
     try:
@@ -102,6 +102,26 @@ class DatabaseOperations(BaseDatabaseOperations):
         second = '%s-12-31 23:59:59.999999'
         second = '%s-12-31 23:59:59.999999'
         return [first % value, second % value]
         return [first % value, second % value]
 
 
+    def convert_values(self, value, field):
+        """SQLite returns floats when it should be returning decimals,
+        and gets dates and datetimes wrong.
+        For consistency with other backends, coerce when required.
+        """
+        internal_type = field.get_internal_type()
+        if internal_type == 'DecimalField':
+            return util.typecast_decimal(field.format_number(value))
+        elif internal_type and internal_type.endswith('IntegerField') or internal_type == 'AutoField':
+            return int(value)
+        elif internal_type == 'DateField':
+            return util.typecast_date(value)
+        elif internal_type == 'DateTimeField':
+            return util.typecast_timestamp(value)
+        elif internal_type == 'TimeField':
+            return util.typecast_time(value)
+
+        # No field, or the field isn't known to be a decimal or integer
+        return value
+
 class DatabaseWrapper(BaseDatabaseWrapper):
 class DatabaseWrapper(BaseDatabaseWrapper):
 
 
     # SQLite requires LIKE statements to include an ESCAPE clause if the value
     # SQLite requires LIKE statements to include an ESCAPE clause if the value

+ 1 - 0
django/db/models/__init__.py

@@ -5,6 +5,7 @@ from django.db.models.loading import get_apps, get_app, get_models, get_model, r
 from django.db.models.query import Q
 from django.db.models.query import Q
 from django.db.models.manager import Manager
 from django.db.models.manager import Manager
 from django.db.models.base import Model
 from django.db.models.base import Model
+from django.db.models.aggregates import *
 from django.db.models.fields import *
 from django.db.models.fields import *
 from django.db.models.fields.subclassing import SubfieldBase
 from django.db.models.fields.subclassing import SubfieldBase
 from django.db.models.fields.files import FileField, ImageField
 from django.db.models.fields.files import FileField, ImageField

+ 66 - 0
django/db/models/aggregates.py

@@ -0,0 +1,66 @@
+"""
+Classes to represent the definitions of aggregate functions.
+"""
+
+class Aggregate(object):
+    """
+    Default Aggregate definition.
+    """
+    def __init__(self, lookup, **extra):
+        """Instantiate a new aggregate.
+
+         * lookup is the field on which the aggregate operates.
+         * extra is a dictionary of additional data to provide for the
+           aggregate definition
+
+        Also utilizes the class variables:
+         * name, the identifier for this aggregate function.
+        """
+        self.lookup = lookup
+        self.extra = extra
+
+    def _default_alias(self):
+        return '%s__%s' % (self.lookup, self.name.lower())
+    default_alias = property(_default_alias)
+
+    def add_to_query(self, query, alias, col, source, is_summary):
+        """Add the aggregate to the nominated query.
+
+        This method is used to convert the generic Aggregate definition into a
+        backend-specific definition.
+
+         * query is the backend-specific query instance to which the aggregate
+           is to be added.
+         * col is a column reference describing the subject field
+           of the aggregate. It can be an alias, or a tuple describing
+           a table and column name.
+         * source is the underlying field or aggregate definition for
+           the column reference. If the aggregate is not an ordinal or
+           computed type, this reference is used to determine the coerced
+           output type of the aggregate.
+         * is_summary is a boolean that is set True if the aggregate is a
+           summary value rather than an annotation.
+        """
+        aggregate = getattr(query.aggregates_module, self.name)
+        query.aggregate_select[alias] = aggregate(col, source=source, is_summary=is_summary, **self.extra)
+
+class Avg(Aggregate):
+    name = 'Avg'
+
+class Count(Aggregate):
+    name = 'Count'
+
+class Max(Aggregate):
+    name = 'Max'
+
+class Min(Aggregate):
+    name = 'Min'
+
+class StdDev(Aggregate):
+    name = 'StdDev'
+
+class Sum(Aggregate):
+    name = 'Sum'
+
+class Variance(Aggregate):
+    name = 'Variance'

+ 6 - 0
django/db/models/manager.py

@@ -101,6 +101,12 @@ class Manager(object):
     def filter(self, *args, **kwargs):
     def filter(self, *args, **kwargs):
         return self.get_query_set().filter(*args, **kwargs)
         return self.get_query_set().filter(*args, **kwargs)
 
 
+    def aggregate(self, *args, **kwargs):
+        return self.get_query_set().aggregate(*args, **kwargs)
+
+    def annotate(self, *args, **kwargs):
+        return self.get_query_set().annotate(*args, **kwargs)
+
     def complex_filter(self, *args, **kwargs):
     def complex_filter(self, *args, **kwargs):
         return self.get_query_set().complex_filter(*args, **kwargs)
         return self.get_query_set().complex_filter(*args, **kwargs)
 
 

+ 82 - 7
django/db/models/query.py

@@ -4,6 +4,7 @@ except NameError:
     from sets import Set as set     # Python 2.3 fallback
     from sets import Set as set     # Python 2.3 fallback
 
 
 from django.db import connection, transaction, IntegrityError
 from django.db import connection, transaction, IntegrityError
+from django.db.models.aggregates import Aggregate
 from django.db.models.fields import DateField
 from django.db.models.fields import DateField
 from django.db.models.query_utils import Q, select_related_descend
 from django.db.models.query_utils import Q, select_related_descend
 from django.db.models import signals, sql
 from django.db.models import signals, sql
@@ -270,18 +271,47 @@ class QuerySet(object):
         else:
         else:
             requested = None
             requested = None
         max_depth = self.query.max_depth
         max_depth = self.query.max_depth
+
         extra_select = self.query.extra_select.keys()
         extra_select = self.query.extra_select.keys()
+        aggregate_select = self.query.aggregate_select.keys()
+
         index_start = len(extra_select)
         index_start = len(extra_select)
+        aggregate_start = index_start + len(self.model._meta.fields)
+
         for row in self.query.results_iter():
         for row in self.query.results_iter():
             if fill_cache:
             if fill_cache:
-                obj, _ = get_cached_row(self.model, row, index_start,
-                        max_depth, requested=requested)
+                obj, aggregate_start = get_cached_row(self.model, row,
+                                    index_start, max_depth, requested=requested)
             else:
             else:
-                obj = self.model(*row[index_start:])
+                # omit aggregates in object creation
+                obj = self.model(*row[index_start:aggregate_start])
+
             for i, k in enumerate(extra_select):
             for i, k in enumerate(extra_select):
                 setattr(obj, k, row[i])
                 setattr(obj, k, row[i])
+
+            # Add the aggregates to the model
+            for i, aggregate in enumerate(aggregate_select):
+                setattr(obj, aggregate, row[i+aggregate_start])
+
             yield obj
             yield obj
 
 
+    def aggregate(self, *args, **kwargs):
+        """
+        Returns a dictionary containing the calculations (aggregation)
+        over the current queryset
+
+        If args is present the expression is passed as a kwarg ussing
+        the Aggregate object's default alias.
+        """
+        for arg in args:
+            kwargs[arg.default_alias] = arg
+
+        for (alias, aggregate_expr) in kwargs.items():
+            self.query.add_aggregate(aggregate_expr, self.model, alias,
+                is_summary=True)
+
+        return self.query.get_aggregation()
+
     def count(self):
     def count(self):
         """
         """
         Performs a SELECT COUNT() and returns the number of records as an
         Performs a SELECT COUNT() and returns the number of records as an
@@ -553,6 +583,25 @@ class QuerySet(object):
         """
         """
         self.query.select_related = other.query.select_related
         self.query.select_related = other.query.select_related
 
 
+    def annotate(self, *args, **kwargs):
+        """
+        Return a query set in which the returned objects have been annotated
+        with data aggregated from related fields.
+        """
+        for arg in args:
+            kwargs[arg.default_alias] = arg
+
+        obj = self._clone()
+
+        obj._setup_aggregate_query()
+
+        # Add the aggregates to the query
+        for (alias, aggregate_expr) in kwargs.items():
+            obj.query.add_aggregate(aggregate_expr, self.model, alias,
+                is_summary=False)
+
+        return obj
+
     def order_by(self, *field_names):
     def order_by(self, *field_names):
         """
         """
         Returns a new QuerySet instance with the ordering changed.
         Returns a new QuerySet instance with the ordering changed.
@@ -641,6 +690,16 @@ class QuerySet(object):
         """
         """
         pass
         pass
 
 
+    def _setup_aggregate_query(self):
+        """
+        Prepare the query for computing a result that contains aggregate annotations.
+        """
+        opts = self.model._meta
+        if not self.query.group_by:
+            field_names = [f.attname for f in opts.fields]
+            self.query.add_fields(field_names, False)
+            self.query.set_group_by()
+
     def as_sql(self):
     def as_sql(self):
         """
         """
         Returns the internal query's SQL and parameters (as a tuple).
         Returns the internal query's SQL and parameters (as a tuple).
@@ -669,6 +728,8 @@ class ValuesQuerySet(QuerySet):
             len(self.field_names) != len(self.model._meta.fields)):
             len(self.field_names) != len(self.model._meta.fields)):
             self.query.trim_extra_select(self.extra_names)
             self.query.trim_extra_select(self.extra_names)
         names = self.query.extra_select.keys() + self.field_names
         names = self.query.extra_select.keys() + self.field_names
+        names.extend(self.query.aggregate_select.keys())
+
         for row in self.query.results_iter():
         for row in self.query.results_iter():
             yield dict(zip(names, row))
             yield dict(zip(names, row))
 
 
@@ -682,20 +743,25 @@ class ValuesQuerySet(QuerySet):
         """
         """
         self.query.clear_select_fields()
         self.query.clear_select_fields()
         self.extra_names = []
         self.extra_names = []
+        self.aggregate_names = []
+
         if self._fields:
         if self._fields:
-            if not self.query.extra_select:
+            if not self.query.extra_select and not self.query.aggregate_select:
                 field_names = list(self._fields)
                 field_names = list(self._fields)
             else:
             else:
                 field_names = []
                 field_names = []
                 for f in self._fields:
                 for f in self._fields:
                     if self.query.extra_select.has_key(f):
                     if self.query.extra_select.has_key(f):
                         self.extra_names.append(f)
                         self.extra_names.append(f)
+                    elif self.query.aggregate_select.has_key(f):
+                        self.aggregate_names.append(f)
                     else:
                     else:
                         field_names.append(f)
                         field_names.append(f)
         else:
         else:
             # Default to all fields.
             # Default to all fields.
             field_names = [f.attname for f in self.model._meta.fields]
             field_names = [f.attname for f in self.model._meta.fields]
 
 
+        self.query.select = []
         self.query.add_fields(field_names, False)
         self.query.add_fields(field_names, False)
         self.query.default_cols = False
         self.query.default_cols = False
         self.field_names = field_names
         self.field_names = field_names
@@ -711,6 +777,7 @@ class ValuesQuerySet(QuerySet):
             c._fields = self._fields[:]
             c._fields = self._fields[:]
         c.field_names = self.field_names
         c.field_names = self.field_names
         c.extra_names = self.extra_names
         c.extra_names = self.extra_names
+        c.aggregate_names = self.aggregate_names
         if setup and hasattr(c, '_setup_query'):
         if setup and hasattr(c, '_setup_query'):
             c._setup_query()
             c._setup_query()
         return c
         return c
@@ -718,10 +785,18 @@ class ValuesQuerySet(QuerySet):
     def _merge_sanity_check(self, other):
     def _merge_sanity_check(self, other):
         super(ValuesQuerySet, self)._merge_sanity_check(other)
         super(ValuesQuerySet, self)._merge_sanity_check(other)
         if (set(self.extra_names) != set(other.extra_names) or
         if (set(self.extra_names) != set(other.extra_names) or
-                set(self.field_names) != set(other.field_names)):
+                set(self.field_names) != set(other.field_names) or
+                self.aggregate_names != other.aggregate_names):
             raise TypeError("Merging '%s' classes must involve the same values in each case."
             raise TypeError("Merging '%s' classes must involve the same values in each case."
                     % self.__class__.__name__)
                     % self.__class__.__name__)
 
 
+    def _setup_aggregate_query(self):
+        """
+        Prepare the query for computing a result that contains aggregate annotations.
+        """
+        self.query.set_group_by()
+
+        super(ValuesQuerySet, self)._setup_aggregate_query()
 
 
 class ValuesListQuerySet(ValuesQuerySet):
 class ValuesListQuerySet(ValuesQuerySet):
     def iterator(self):
     def iterator(self):
@@ -729,14 +804,14 @@ class ValuesListQuerySet(ValuesQuerySet):
         if self.flat and len(self._fields) == 1:
         if self.flat and len(self._fields) == 1:
             for row in self.query.results_iter():
             for row in self.query.results_iter():
                 yield row[0]
                 yield row[0]
-        elif not self.query.extra_select:
+        elif not self.query.extra_select and not self.query.aggregate_select:
             for row in self.query.results_iter():
             for row in self.query.results_iter():
                 yield tuple(row)
                 yield tuple(row)
         else:
         else:
             # When extra(select=...) is involved, the extra cols come are
             # When extra(select=...) is involved, the extra cols come are
             # always at the start of the row, so we need to reorder the fields
             # always at the start of the row, so we need to reorder the fields
             # to match the order in self._fields.
             # to match the order in self._fields.
-            names = self.query.extra_select.keys() + self.field_names
+            names = self.query.extra_select.keys() + self.field_names + self.query.aggregate_select.keys()
             for row in self.query.results_iter():
             for row in self.query.results_iter():
                 data = dict(zip(names, row))
                 data = dict(zip(names, row))
                 yield tuple([data[f] for f in self._fields])
                 yield tuple([data[f] for f in self._fields])

+ 0 - 1
django/db/models/query_utils.py

@@ -64,4 +64,3 @@ def select_related_descend(field, restricted, requested):
     if not restricted and field.null:
     if not restricted and field.null:
         return False
         return False
     return True
     return True
-

+ 130 - 0
django/db/models/sql/aggregates.py

@@ -0,0 +1,130 @@
+"""
+Classes to represent the default SQL aggregate functions
+"""
+
+class AggregateField(object):
+    """An internal field mockup used to identify aggregates in the
+    data-conversion parts of the database backend.
+    """
+    def __init__(self, internal_type):
+        self.internal_type = internal_type
+    def get_internal_type(self):
+        return self.internal_type
+
+ordinal_aggregate_field = AggregateField('IntegerField')
+computed_aggregate_field = AggregateField('FloatField')
+
+class Aggregate(object):
+    """
+    Default SQL Aggregate.
+    """
+    is_ordinal = False
+    is_computed = False
+    sql_template = '%(function)s(%(field)s)'
+
+    def __init__(self, col, source=None, is_summary=False, **extra):
+        """Instantiate an SQL aggregate
+
+         * col is a column reference describing the subject field
+           of the aggregate. It can be an alias, or a tuple describing
+           a table and column name.
+         * source is the underlying field or aggregate definition for
+           the column reference. If the aggregate is not an ordinal or
+           computed type, this reference is used to determine the coerced
+           output type of the aggregate.
+         * extra is a dictionary of additional data to provide for the
+           aggregate definition
+
+        Also utilizes the class variables:
+         * sql_function, the name of the SQL function that implements the
+           aggregate.
+         * sql_template, a template string that is used to render the
+           aggregate into SQL.
+         * is_ordinal, a boolean indicating if the output of this aggregate
+           is an integer (e.g., a count)
+         * is_computed, a boolean indicating if this output of this aggregate
+           is a computed float (e.g., an average), regardless of the input
+           type.
+
+        """
+        self.col = col
+        self.source = source
+        self.is_summary = is_summary
+        self.extra = extra
+
+        # Follow the chain of aggregate sources back until you find an
+        # actual field, or an aggregate that forces a particular output
+        # type. This type of this field will be used to coerce values
+        # retrieved from the database.
+        tmp = self
+
+        while tmp and isinstance(tmp, Aggregate):
+            if getattr(tmp, 'is_ordinal', False):
+                tmp = ordinal_aggregate_field
+            elif getattr(tmp, 'is_computed', False):
+                tmp = computed_aggregate_field
+            else:
+                tmp = tmp.source
+
+        self.field = tmp
+
+    def relabel_aliases(self, change_map):
+        if isinstance(self.col, (list, tuple)):
+            self.col = (change_map.get(self.col[0], self.col[0]), self.col[1])
+
+    def as_sql(self, quote_func=None):
+        "Return the aggregate, rendered as SQL."
+        if not quote_func:
+            quote_func = lambda x: x
+
+        if hasattr(self.col, 'as_sql'):
+            field_name = self.col.as_sql(quote_func)
+        elif isinstance(self.col, (list, tuple)):
+            field_name = '.'.join([quote_func(c) for c in self.col])
+        else:
+            field_name = self.col
+
+        params = {
+            'function': self.sql_function,
+            'field': field_name
+        }
+        params.update(self.extra)
+
+        return self.sql_template % params
+
+
+class Avg(Aggregate):
+    is_computed = True
+    sql_function = 'AVG'
+
+class Count(Aggregate):
+    is_ordinal = True
+    sql_function = 'COUNT'
+    sql_template = '%(function)s(%(distinct)s%(field)s)'
+
+    def __init__(self, col, distinct=False, **extra):
+        super(Count, self).__init__(col, distinct=distinct and 'DISTINCT ' or '', **extra)
+
+class Max(Aggregate):
+    sql_function = 'MAX'
+
+class Min(Aggregate):
+    sql_function = 'MIN'
+
+class StdDev(Aggregate):
+    is_computed = True
+
+    def __init__(self, col, sample=False, **extra):
+        super(StdDev, self).__init__(col, **extra)
+        self.sql_function = sample and 'STDDEV_SAMP' or 'STDDEV_POP'
+
+class Sum(Aggregate):
+    sql_function = 'SUM'
+
+class Variance(Aggregate):
+    is_computed = True
+
+    def __init__(self, col, sample=False, **extra):
+        super(Variance, self).__init__(col, **extra)
+        self.sql_function = sample and 'VAR_SAMP' or 'VAR_POP'
+

+ 0 - 53
django/db/models/sql/datastructures.py

@@ -25,59 +25,6 @@ class RawValue(object):
     def __init__(self, value):
     def __init__(self, value):
         self.value = value
         self.value = value
 
 
-class Aggregate(object):
-    """
-    Base class for all aggregate-related classes (min, max, avg, count, sum).
-    """
-    def relabel_aliases(self, change_map):
-        """
-        Relabel the column alias, if necessary. Must be implemented by
-        subclasses.
-        """
-        raise NotImplementedError
-
-    def as_sql(self, quote_func=None):
-        """
-        Returns the SQL string fragment for this object.
-
-        The quote_func function is used to quote the column components. If
-        None, it defaults to doing nothing.
-
-        Must be implemented by subclasses.
-        """
-        raise NotImplementedError
-
-class Count(Aggregate):
-    """
-    Perform a count on the given column.
-    """
-    def __init__(self, col='*', distinct=False):
-        """
-        Set the column to count on (defaults to '*') and set whether the count
-        should be distinct or not.
-        """
-        self.col = col
-        self.distinct = distinct
-
-    def relabel_aliases(self, change_map):
-        c = self.col
-        if isinstance(c, (list, tuple)):
-            self.col = (change_map.get(c[0], c[0]), c[1])
-
-    def as_sql(self, quote_func=None):
-        if not quote_func:
-            quote_func = lambda x: x
-        if isinstance(self.col, (list, tuple)):
-            col = ('%s.%s' % tuple([quote_func(c) for c in self.col]))
-        elif hasattr(self.col, 'as_sql'):
-            col = self.col.as_sql(quote_func)
-        else:
-            col = self.col
-        if self.distinct:
-            return 'COUNT(DISTINCT %s)' % col
-        else:
-            return 'COUNT(%s)' % col
-
 class Date(object):
 class Date(object):
     """
     """
     Add a date selection column.
     Add a date selection column.

+ 239 - 68
django/db/models/sql/query.py

@@ -12,12 +12,13 @@ from copy import deepcopy
 from django.utils.tree import Node
 from django.utils.tree import Node
 from django.utils.datastructures import SortedDict
 from django.utils.datastructures import SortedDict
 from django.utils.encoding import force_unicode
 from django.utils.encoding import force_unicode
+from django.db.backends.util import truncate_name
 from django.db import connection
 from django.db import connection
 from django.db.models import signals
 from django.db.models import signals
 from django.db.models.fields import FieldDoesNotExist
 from django.db.models.fields import FieldDoesNotExist
 from django.db.models.query_utils import select_related_descend
 from django.db.models.query_utils import select_related_descend
+from django.db.models.sql import aggregates as base_aggregates_module
 from django.db.models.sql.where import WhereNode, Constraint, EverythingNode, AND, OR
 from django.db.models.sql.where import WhereNode, Constraint, EverythingNode, AND, OR
-from django.db.models.sql.datastructures import Count
 from django.core.exceptions import FieldError
 from django.core.exceptions import FieldError
 from datastructures import EmptyResultSet, Empty, MultiJoin
 from datastructures import EmptyResultSet, Empty, MultiJoin
 from constants import *
 from constants import *
@@ -40,6 +41,7 @@ class BaseQuery(object):
 
 
     alias_prefix = 'T'
     alias_prefix = 'T'
     query_terms = QUERY_TERMS
     query_terms = QUERY_TERMS
+    aggregates_module = base_aggregates_module
 
 
     def __init__(self, model, connection, where=WhereNode):
     def __init__(self, model, connection, where=WhereNode):
         self.model = model
         self.model = model
@@ -73,6 +75,9 @@ class BaseQuery(object):
         self.select_related = False
         self.select_related = False
         self.related_select_cols = []
         self.related_select_cols = []
 
 
+        # SQL aggregate-related attributes
+        self.aggregate_select = SortedDict() # Maps alias -> SQL aggregate function
+
         # Arbitrary maximum limit for select_related. Prevents infinite
         # Arbitrary maximum limit for select_related. Prevents infinite
         # recursion. Can be changed by the depth parameter to select_related().
         # recursion. Can be changed by the depth parameter to select_related().
         self.max_depth = 5
         self.max_depth = 5
@@ -178,6 +183,7 @@ class BaseQuery(object):
         obj.distinct = self.distinct
         obj.distinct = self.distinct
         obj.select_related = self.select_related
         obj.select_related = self.select_related
         obj.related_select_cols = []
         obj.related_select_cols = []
+        obj.aggregate_select = self.aggregate_select.copy()
         obj.max_depth = self.max_depth
         obj.max_depth = self.max_depth
         obj.extra_select = self.extra_select.copy()
         obj.extra_select = self.extra_select.copy()
         obj.extra_tables = self.extra_tables
         obj.extra_tables = self.extra_tables
@@ -194,6 +200,35 @@ class BaseQuery(object):
             obj._setup_query()
             obj._setup_query()
         return obj
         return obj
 
 
+    def convert_values(self, value, field):
+        """Convert the database-returned value into a type that is consistent
+        across database backends.
+
+        By default, this defers to the underlying backend operations, but
+        it can be overridden by Query classes for specific backends.
+        """
+        return self.connection.ops.convert_values(value, field)
+
+    def resolve_aggregate(self, value, aggregate):
+        """Resolve the value of aggregates returned by the database to
+        consistent (and reasonable) types.
+
+        This is required because of the predisposition of certain backends
+        to return Decimal and long types when they are not needed.
+        """
+        if value is None:
+            # Return None as-is
+            return value
+        elif aggregate.is_ordinal:
+            # Any ordinal aggregate (e.g., count) returns an int
+            return int(value)
+        elif aggregate.is_computed:
+            # Any computed aggregate (e.g., avg) returns a float
+            return float(value)
+        else:
+            # Return value depends on the type of the field being processed.
+            return self.convert_values(value, aggregate.field)
+
     def results_iter(self):
     def results_iter(self):
         """
         """
         Returns an iterator over the results from executing this query.
         Returns an iterator over the results from executing this query.
@@ -212,29 +247,78 @@ class BaseQuery(object):
                         else:
                         else:
                             fields = self.model._meta.fields
                             fields = self.model._meta.fields
                     row = self.resolve_columns(row, fields)
                     row = self.resolve_columns(row, fields)
+
+                if self.aggregate_select:
+                    aggregate_start = len(self.extra_select.keys()) + len(self.select)
+                    row = tuple(row[:aggregate_start]) + tuple([
+                        self.resolve_aggregate(value, aggregate)
+                        for (alias, aggregate), value
+                        in zip(self.aggregate_select.items(), row[aggregate_start:])
+                    ])
+
                 yield row
                 yield row
 
 
+    def get_aggregation(self):
+        """
+        Returns the dictionary with the values of the existing aggregations.
+        """
+        if not self.aggregate_select:
+            return {}
+
+        # If there is a group by clause, aggregating does not add useful
+        # information but retrieves only the first row. Aggregate
+        # over the subquery instead.
+        if self.group_by:
+            from subqueries import AggregateQuery
+            query = AggregateQuery(self.model, self.connection)
+
+            obj = self.clone()
+
+            # Remove any aggregates marked for reduction from the subquery
+            # and move them to the outer AggregateQuery.
+            for alias, aggregate in self.aggregate_select.items():
+                if aggregate.is_summary:
+                    query.aggregate_select[alias] = aggregate
+                    del obj.aggregate_select[alias]
+
+            query.add_subquery(obj)
+        else:
+            query = self
+            self.select = []
+            self.default_cols = False
+            self.extra_select = {}
+
+        query.clear_ordering(True)
+        query.clear_limits()
+        query.select_related = False
+        query.related_select_cols = []
+        query.related_select_fields = []
+
+        return dict([
+            (alias, self.resolve_aggregate(val, aggregate))
+            for (alias, aggregate), val
+            in zip(query.aggregate_select.items(), query.execute_sql(SINGLE))
+        ])
+
     def get_count(self):
     def get_count(self):
         """
         """
         Performs a COUNT() query using the current filter constraints.
         Performs a COUNT() query using the current filter constraints.
         """
         """
-        from subqueries import CountQuery
         obj = self.clone()
         obj = self.clone()
-        obj.clear_ordering(True)
-        obj.clear_limits()
-        obj.select_related = False
-        obj.related_select_cols = []
-        obj.related_select_fields = []
-        if len(obj.select) > 1:
-            obj = self.clone(CountQuery, _query=obj, where=self.where_class(),
-                    distinct=False)
-            obj.select = []
-            obj.extra_select = SortedDict()
+        if len(self.select) > 1:
+            # If a select clause exists, then the query has already started to
+            # specify the columns that are to be returned.
+            # In this case, we need to use a subquery to evaluate the count.
+            from subqueries import AggregateQuery
+            subquery = obj
+            subquery.clear_ordering(True)
+            subquery.clear_limits()
+
+            obj = AggregateQuery(obj.model, obj.connection)
+            obj.add_subquery(subquery)
+
         obj.add_count_column()
         obj.add_count_column()
-        data = obj.execute_sql(SINGLE)
-        if not data:
-            return 0
-        number = data[0]
+        number = obj.get_aggregation()[None]
 
 
         # Apply offset and limit constraints manually, since using LIMIT/OFFSET
         # Apply offset and limit constraints manually, since using LIMIT/OFFSET
         # in SQL (in variants that provide them) doesn't change the COUNT
         # in SQL (in variants that provide them) doesn't change the COUNT
@@ -450,25 +534,41 @@ class BaseQuery(object):
             for col in self.select:
             for col in self.select:
                 if isinstance(col, (list, tuple)):
                 if isinstance(col, (list, tuple)):
                     r = '%s.%s' % (qn(col[0]), qn(col[1]))
                     r = '%s.%s' % (qn(col[0]), qn(col[1]))
-                    if with_aliases and col[1] in col_aliases:
-                        c_alias = 'Col%d' % len(col_aliases)
-                        result.append('%s AS %s' % (r, c_alias))
-                        aliases.add(c_alias)
-                        col_aliases.add(c_alias)
+                    if with_aliases:
+                        if col[1] in col_aliases:
+                            c_alias = 'Col%d' % len(col_aliases)
+                            result.append('%s AS %s' % (r, c_alias))
+                            aliases.add(c_alias)
+                            col_aliases.add(c_alias)
+                        else:
+                            result.append('%s AS %s' % (r, col[1]))
+                            aliases.add(r)
+                            col_aliases.add(col[1])
                     else:
                     else:
                         result.append(r)
                         result.append(r)
                         aliases.add(r)
                         aliases.add(r)
                         col_aliases.add(col[1])
                         col_aliases.add(col[1])
                 else:
                 else:
                     result.append(col.as_sql(quote_func=qn))
                     result.append(col.as_sql(quote_func=qn))
+
                     if hasattr(col, 'alias'):
                     if hasattr(col, 'alias'):
                         aliases.add(col.alias)
                         aliases.add(col.alias)
                         col_aliases.add(col.alias)
                         col_aliases.add(col.alias)
+
         elif self.default_cols:
         elif self.default_cols:
             cols, new_aliases = self.get_default_columns(with_aliases,
             cols, new_aliases = self.get_default_columns(with_aliases,
                     col_aliases)
                     col_aliases)
             result.extend(cols)
             result.extend(cols)
             aliases.update(new_aliases)
             aliases.update(new_aliases)
+
+        result.extend([
+            '%s%s' % (
+                aggregate.as_sql(quote_func=qn),
+                alias is not None and ' AS %s' % qn(alias) or ''
+            )
+            for alias, aggregate in self.aggregate_select.items()
+        ])
+
         for table, col in self.related_select_cols:
         for table, col in self.related_select_cols:
             r = '%s.%s' % (qn(table), qn(col))
             r = '%s.%s' % (qn(table), qn(col))
             if with_aliases and col in col_aliases:
             if with_aliases and col in col_aliases:
@@ -538,7 +638,7 @@ class BaseQuery(object):
         Returns a list of strings that are joined together to go after the
         Returns a list of strings that are joined together to go after the
         "FROM" part of the query, as well as a list any extra parameters that
         "FROM" part of the query, as well as a list any extra parameters that
         need to be included. Sub-classes, can override this to create a
         need to be included. Sub-classes, can override this to create a
-        from-clause via a "select", for example (e.g. CountQuery).
+        from-clause via a "select".
 
 
         This should only be called after any SQL construction methods that
         This should only be called after any SQL construction methods that
         might change the tables we need. This means the select columns and
         might change the tables we need. This means the select columns and
@@ -635,10 +735,13 @@ class BaseQuery(object):
                     order = asc
                     order = asc
                 result.append('%s %s' % (field, order))
                 result.append('%s %s' % (field, order))
                 continue
                 continue
+            col, order = get_order_dir(field, asc)
+            if col in self.aggregate_select:
+                result.append('%s %s' % (col, order))
+                continue
             if '.' in field:
             if '.' in field:
                 # This came in through an extra(order_by=...) addition. Pass it
                 # This came in through an extra(order_by=...) addition. Pass it
                 # on verbatim.
                 # on verbatim.
-                col, order = get_order_dir(field, asc)
                 table, col = col.split('.', 1)
                 table, col = col.split('.', 1)
                 if (table, col) not in processed_pairs:
                 if (table, col) not in processed_pairs:
                     elt = '%s.%s' % (qn(table), col)
                     elt = '%s.%s' % (qn(table), col)
@@ -657,7 +760,6 @@ class BaseQuery(object):
                             ordering_aliases.append(elt)
                             ordering_aliases.append(elt)
                         result.append('%s %s' % (elt, order))
                         result.append('%s %s' % (elt, order))
             else:
             else:
-                col, order = get_order_dir(field, asc)
                 elt = qn2(col)
                 elt = qn2(col)
                 if distinct and col not in select_aliases:
                 if distinct and col not in select_aliases:
                     ordering_aliases.append(elt)
                     ordering_aliases.append(elt)
@@ -1068,6 +1170,48 @@ class BaseQuery(object):
             self.fill_related_selections(f.rel.to._meta, alias, cur_depth + 1,
             self.fill_related_selections(f.rel.to._meta, alias, cur_depth + 1,
                     used, next, restricted, new_nullable, dupe_set, avoid)
                     used, next, restricted, new_nullable, dupe_set, avoid)
 
 
+    def add_aggregate(self, aggregate, model, alias, is_summary):
+        """
+        Adds a single aggregate expression to the Query
+        """
+        opts = model._meta
+        field_list = aggregate.lookup.split(LOOKUP_SEP)
+        if (len(field_list) == 1 and
+            aggregate.lookup in self.aggregate_select.keys()):
+            # Aggregate is over an annotation
+            field_name = field_list[0]
+            col = field_name
+            source = self.aggregate_select[field_name]
+        elif (len(field_list) > 1 or
+            field_list[0] not in [i.name for i in opts.fields]):
+            field, source, opts, join_list, last, _ = self.setup_joins(
+                field_list, opts, self.get_initial_alias(), False)
+
+            # Process the join chain to see if it can be trimmed
+            _, _, col, _, join_list = self.trim_joins(source, join_list, last, False)
+
+            # If the aggregate references a model or field that requires a join,
+            # those joins must be LEFT OUTER - empty join rows must be returned
+            # in order for zeros to be returned for those aggregates.
+            for column_alias in join_list:
+                self.promote_alias(column_alias, unconditional=True)
+
+            col = (join_list[-1], col)
+        else:
+            # Aggregate references a normal field
+            field_name = field_list[0]
+            source = opts.get_field(field_name)
+            if not (self.group_by and is_summary):
+                # Only use a column alias if this is a
+                # standalone aggregate, or an annotation
+                col = (opts.db_table, source.column)
+            else:
+                col = field_name
+
+        # Add the aggregate to the query
+        alias = truncate_name(alias, self.connection.ops.max_name_length())
+        aggregate.add_to_query(self, alias, col=col, source=source, is_summary=is_summary)
+
     def add_filter(self, filter_expr, connector=AND, negate=False, trim=False,
     def add_filter(self, filter_expr, connector=AND, negate=False, trim=False,
             can_reuse=None, process_extras=True):
             can_reuse=None, process_extras=True):
         """
         """
@@ -1119,6 +1263,11 @@ class BaseQuery(object):
         elif callable(value):
         elif callable(value):
             value = value()
             value = value()
 
 
+        for alias, aggregate in self.aggregate_select.items():
+            if alias == parts[0]:
+                self.having.add((aggregate, lookup_type, value), AND)
+                return
+
         opts = self.get_meta()
         opts = self.get_meta()
         alias = self.get_initial_alias()
         alias = self.get_initial_alias()
         allow_many = trim or not negate
         allow_many = trim or not negate
@@ -1131,38 +1280,9 @@ class BaseQuery(object):
             self.split_exclude(filter_expr, LOOKUP_SEP.join(parts[:e.level]),
             self.split_exclude(filter_expr, LOOKUP_SEP.join(parts[:e.level]),
                     can_reuse)
                     can_reuse)
             return
             return
-        final = len(join_list)
-        penultimate = last.pop()
-        if penultimate == final:
-            penultimate = last.pop()
-        if trim and len(join_list) > 1:
-            extra = join_list[penultimate:]
-            join_list = join_list[:penultimate]
-            final = penultimate
-            penultimate = last.pop()
-            col = self.alias_map[extra[0]][LHS_JOIN_COL]
-            for alias in extra:
-                self.unref_alias(alias)
-        else:
-            col = target.column
-        alias = join_list[-1]
 
 
-        while final > 1:
-            # An optimization: if the final join is against the same column as
-            # we are comparing against, we can go back one step in the join
-            # chain and compare against the lhs of the join instead (and then
-            # repeat the optimization). The result, potentially, involves less
-            # table joins.
-            join = self.alias_map[alias]
-            if col != join[RHS_JOIN_COL]:
-                break
-            self.unref_alias(alias)
-            alias = join[LHS_ALIAS]
-            col = join[LHS_JOIN_COL]
-            join_list = join_list[:-1]
-            final -= 1
-            if final == penultimate:
-                penultimate = last.pop()
+        # Process the join chain to see if it can be trimmed
+        final, penultimate, col, alias, join_list = self.trim_joins(target, join_list, last, trim)
 
 
         if (lookup_type == 'isnull' and value is True and not negate and
         if (lookup_type == 'isnull' and value is True and not negate and
                 final > 1):
                 final > 1):
@@ -1313,7 +1433,7 @@ class BaseQuery(object):
                         field, model, direct, m2m = opts.get_field_by_name(f.name)
                         field, model, direct, m2m = opts.get_field_by_name(f.name)
                         break
                         break
                 else:
                 else:
-                    names = opts.get_all_field_names()
+                    names = opts.get_all_field_names() + self.aggregate_select.keys()
                     raise FieldError("Cannot resolve keyword %r into field. "
                     raise FieldError("Cannot resolve keyword %r into field. "
                             "Choices are: %s" % (name, ", ".join(names)))
                             "Choices are: %s" % (name, ", ".join(names)))
 
 
@@ -1462,6 +1582,43 @@ class BaseQuery(object):
 
 
         return field, target, opts, joins, last, extra_filters
         return field, target, opts, joins, last, extra_filters
 
 
+    def trim_joins(self, target, join_list, last, trim):
+        """An optimization: if the final join is against the same column as
+        we are comparing against, we can go back one step in a join
+        chain and compare against the LHS of the join instead (and then
+        repeat the optimization). The result, potentially, involves less
+        table joins.
+
+        Returns a tuple
+        """
+        final = len(join_list)
+        penultimate = last.pop()
+        if penultimate == final:
+            penultimate = last.pop()
+        if trim and len(join_list) > 1:
+            extra = join_list[penultimate:]
+            join_list = join_list[:penultimate]
+            final = penultimate
+            penultimate = last.pop()
+            col = self.alias_map[extra[0]][LHS_JOIN_COL]
+            for alias in extra:
+                self.unref_alias(alias)
+        else:
+            col = target.column
+        alias = join_list[-1]
+        while final > 1:
+            join = self.alias_map[alias]
+            if col != join[RHS_JOIN_COL]:
+                break
+            self.unref_alias(alias)
+            alias = join[LHS_ALIAS]
+            col = join[LHS_JOIN_COL]
+            join_list = join_list[:-1]
+            final -= 1
+            if final == penultimate:
+                penultimate = last.pop()
+        return final, penultimate, col, alias, join_list
+
     def update_dupe_avoidance(self, opts, col, alias):
     def update_dupe_avoidance(self, opts, col, alias):
         """
         """
         For a column that is one of multiple pointing to the same table, update
         For a column that is one of multiple pointing to the same table, update
@@ -1554,6 +1711,7 @@ class BaseQuery(object):
         """
         """
         alias = self.get_initial_alias()
         alias = self.get_initial_alias()
         opts = self.get_meta()
         opts = self.get_meta()
+
         try:
         try:
             for name in field_names:
             for name in field_names:
                 field, target, u2, joins, u3, u4 = self.setup_joins(
                 field, target, u2, joins, u3, u4 = self.setup_joins(
@@ -1574,7 +1732,7 @@ class BaseQuery(object):
         except MultiJoin:
         except MultiJoin:
             raise FieldError("Invalid field name: '%s'" % name)
             raise FieldError("Invalid field name: '%s'" % name)
         except FieldError:
         except FieldError:
-            names = opts.get_all_field_names() + self.extra_select.keys()
+            names = opts.get_all_field_names() + self.extra_select.keys() + self.aggregate_select.keys()
             names.sort()
             names.sort()
             raise FieldError("Cannot resolve keyword %r into field. "
             raise FieldError("Cannot resolve keyword %r into field. "
                     "Choices are: %s" % (name, ", ".join(names)))
                     "Choices are: %s" % (name, ", ".join(names)))
@@ -1609,38 +1767,52 @@ class BaseQuery(object):
         if force_empty:
         if force_empty:
             self.default_ordering = False
             self.default_ordering = False
 
 
+    def set_group_by(self):
+        """
+        Expands the GROUP BY clause required by the query.
+
+        This will usually be the set of all non-aggregate fields in the
+        return data. If the database backend supports grouping by the
+        primary key, and the query would be equivalent, the optimization
+        will be made automatically.
+        """
+        if self.connection.features.allows_group_by_pk:
+            if len(self.select) == len(self.model._meta.fields):
+                self.group_by.append('.'.join([self.model._meta.db_table,
+                                               self.model._meta.pk.column]))
+                return
+
+        for sel in self.select:
+            self.group_by.append(sel)
+
     def add_count_column(self):
     def add_count_column(self):
         """
         """
         Converts the query to do count(...) or count(distinct(pk)) in order to
         Converts the query to do count(...) or count(distinct(pk)) in order to
         get its size.
         get its size.
         """
         """
-        # TODO: When group_by support is added, this needs to be adjusted so
-        # that it doesn't totally overwrite the select list.
         if not self.distinct:
         if not self.distinct:
             if not self.select:
             if not self.select:
-                select = Count()
+                count = self.aggregates_module.Count('*', is_summary=True)
             else:
             else:
                 assert len(self.select) == 1, \
                 assert len(self.select) == 1, \
                         "Cannot add count col with multiple cols in 'select': %r" % self.select
                         "Cannot add count col with multiple cols in 'select': %r" % self.select
-                select = Count(self.select[0])
+                count = self.aggregates_module.Count(self.select[0])
         else:
         else:
             opts = self.model._meta
             opts = self.model._meta
             if not self.select:
             if not self.select:
-                select = Count((self.join((None, opts.db_table, None, None)),
-                        opts.pk.column), True)
+                count = self.aggregates_module.Count((self.join((None, opts.db_table, None, None)), opts.pk.column),
+                                         is_summary=True, distinct=True)
             else:
             else:
                 # Because of SQL portability issues, multi-column, distinct
                 # Because of SQL portability issues, multi-column, distinct
                 # counts need a sub-query -- see get_count() for details.
                 # counts need a sub-query -- see get_count() for details.
                 assert len(self.select) == 1, \
                 assert len(self.select) == 1, \
                         "Cannot add count col with multiple cols in 'select'."
                         "Cannot add count col with multiple cols in 'select'."
-                select = Count(self.select[0], True)
 
 
+                count = self.aggregates_module.Count(self.select[0], distinct=True)
             # Distinct handling is done in Count(), so don't do it at this
             # Distinct handling is done in Count(), so don't do it at this
             # level.
             # level.
             self.distinct = False
             self.distinct = False
-        self.select = [select]
-        self.select_fields = [None]
-        self.extra_select = {}
+        self.aggregate_select = {None: count}
 
 
     def add_select_related(self, fields):
     def add_select_related(self, fields):
         """
         """
@@ -1758,7 +1930,6 @@ class BaseQuery(object):
                 return empty_iter()
                 return empty_iter()
             else:
             else:
                 return
                 return
-
         cursor = self.connection.cursor()
         cursor = self.connection.cursor()
         cursor.execute(sql, params)
         cursor.execute(sql, params)
 
 

+ 20 - 10
django/db/models/sql/subqueries.py

@@ -9,7 +9,7 @@ from django.db.models.sql.query import Query
 from django.db.models.sql.where import AND, Constraint
 from django.db.models.sql.where import AND, Constraint
 
 
 __all__ = ['DeleteQuery', 'UpdateQuery', 'InsertQuery', 'DateQuery',
 __all__ = ['DeleteQuery', 'UpdateQuery', 'InsertQuery', 'DateQuery',
-        'CountQuery']
+        'AggregateQuery']
 
 
 class DeleteQuery(Query):
 class DeleteQuery(Query):
     """
     """
@@ -400,15 +400,25 @@ class DateQuery(Query):
         self.distinct = True
         self.distinct = True
         self.order_by = order == 'ASC' and [1] or [-1]
         self.order_by = order == 'ASC' and [1] or [-1]
 
 
-class CountQuery(Query):
+class AggregateQuery(Query):
     """
     """
-    A CountQuery knows how to take a normal query which would select over
-    multiple distinct columns and turn it into SQL that can be used on a
-    variety of backends (it requires a select in the FROM clause).
+    An AggregateQuery takes another query as a parameter to the FROM
+    clause and only selects the elements in the provided list.
     """
     """
-    def get_from_clause(self):
-        result, params = self._query.as_sql()
-        return ['(%s) A1' % result], params
+    def add_subquery(self, query):
+        self.subquery, self.sub_params = query.as_sql(with_col_aliases=True)
 
 
-    def get_ordering(self):
-        return ()
+    def as_sql(self, quote_func=None):
+        """
+        Creates the SQL for this query. Returns the SQL string and list of
+        parameters.
+        """
+        sql = ('SELECT %s FROM (%s) subquery' % (
+            ', '.join([
+                aggregate.as_sql()
+                for aggregate in self.aggregate_select.values()
+            ]),
+            self.subquery)
+        )
+        params = self.sub_params
+        return (sql, params)

+ 15 - 10
django/test/testcases.py

@@ -14,6 +14,7 @@ from django.test.client import Client
 from django.utils import simplejson
 from django.utils import simplejson
 
 
 normalize_long_ints = lambda s: re.sub(r'(?<![\w])(\d+)L(?![\w])', '\\1', s)
 normalize_long_ints = lambda s: re.sub(r'(?<![\w])(\d+)L(?![\w])', '\\1', s)
+normalize_decimals = lambda s: re.sub(r"Decimal\('(\d+(\.\d*)?)'\)", lambda m: "Decimal(\"%s\")" % m.groups()[0], s)
 
 
 def to_list(value):
 def to_list(value):
     """
     """
@@ -31,7 +32,7 @@ class OutputChecker(doctest.OutputChecker):
     def check_output(self, want, got, optionflags):
     def check_output(self, want, got, optionflags):
         "The entry method for doctest output checking. Defers to a sequence of child checkers"
         "The entry method for doctest output checking. Defers to a sequence of child checkers"
         checks = (self.check_output_default,
         checks = (self.check_output_default,
-                  self.check_output_long,
+                  self.check_output_numeric,
                   self.check_output_xml,
                   self.check_output_xml,
                   self.check_output_json)
                   self.check_output_json)
         for check in checks:
         for check in checks:
@@ -43,19 +44,23 @@ class OutputChecker(doctest.OutputChecker):
         "The default comparator provided by doctest - not perfect, but good for most purposes"
         "The default comparator provided by doctest - not perfect, but good for most purposes"
         return doctest.OutputChecker.check_output(self, want, got, optionflags)
         return doctest.OutputChecker.check_output(self, want, got, optionflags)
 
 
-    def check_output_long(self, want, got, optionflags):
-        """Doctest does an exact string comparison of output, which means long
-        integers aren't equal to normal integers ("22L" vs. "22"). The
-        following code normalizes long integers so that they equal normal
-        integers.
+    def check_output_numeric(self, want, got, optionflags):
+        """Doctest does an exact string comparison of output, which means that
+        some numerically equivalent values aren't equal. This check normalizes
+         * long integers (22L) so that they equal normal integers. (22)
+         * Decimals so that they are comparable, regardless of the change
+           made to __repr__ in Python 2.6.
         """
         """
-        return normalize_long_ints(want) == normalize_long_ints(got)
+        return doctest.OutputChecker.check_output(self,
+            normalize_decimals(normalize_long_ints(want)),
+            normalize_decimals(normalize_long_ints(got)),
+            optionflags)
 
 
     def check_output_xml(self, want, got, optionsflags):
     def check_output_xml(self, want, got, optionsflags):
         """Tries to do a 'xml-comparision' of want and got.  Plain string
         """Tries to do a 'xml-comparision' of want and got.  Plain string
         comparision doesn't always work because, for example, attribute
         comparision doesn't always work because, for example, attribute
         ordering should not be important.
         ordering should not be important.
-        
+
         Based on http://codespeak.net/svn/lxml/trunk/src/lxml/doctestcompare.py
         Based on http://codespeak.net/svn/lxml/trunk/src/lxml/doctestcompare.py
         """
         """
         _norm_whitespace_re = re.compile(r'[ \t\n][ \t\n]+')
         _norm_whitespace_re = re.compile(r'[ \t\n][ \t\n]+')
@@ -102,7 +107,7 @@ class OutputChecker(doctest.OutputChecker):
             wrapper = '<root>%s</root>'
             wrapper = '<root>%s</root>'
             want = wrapper % want
             want = wrapper % want
             got = wrapper % got
             got = wrapper % got
-            
+
         # Parse the want and got strings, and compare the parsings.
         # Parse the want and got strings, and compare the parsings.
         try:
         try:
             want_root = parseString(want).firstChild
             want_root = parseString(want).firstChild
@@ -174,7 +179,7 @@ class TestCase(unittest.TestCase):
         """Performs any pre-test setup. This includes:
         """Performs any pre-test setup. This includes:
 
 
             * Flushing the database.
             * Flushing the database.
-            * If the Test Case class has a 'fixtures' member, installing the 
+            * If the Test Case class has a 'fixtures' member, installing the
               named fixtures.
               named fixtures.
             * If the Test Case class has a 'urls' member, replace the
             * If the Test Case class has a 'urls' member, replace the
               ROOT_URLCONF with it.
               ROOT_URLCONF with it.

+ 1 - 1
docs/index.txt

@@ -42,7 +42,7 @@ The model layer
     * **Models:** :ref:`Model syntax <topics-db-models>` | :ref:`Field types <ref-models-fields>` | :ref:`Meta options <ref-models-options>`
     * **Models:** :ref:`Model syntax <topics-db-models>` | :ref:`Field types <ref-models-fields>` | :ref:`Meta options <ref-models-options>`
     * **QuerySets:** :ref:`Executing queries <topics-db-queries>` | :ref:`QuerySet method reference <ref-models-querysets>`
     * **QuerySets:** :ref:`Executing queries <topics-db-queries>` | :ref:`QuerySet method reference <ref-models-querysets>`
     * **Model instances:** :ref:`Instance methods <ref-models-instances>` | :ref:`Accessing related objects <ref-models-relations>`
     * **Model instances:** :ref:`Instance methods <ref-models-instances>` | :ref:`Accessing related objects <ref-models-relations>`
-    * **Advanced:** :ref:`Managers <topics-db-managers>` | :ref:`Raw SQL <topics-db-sql>` | :ref:`Transactions <topics-db-transactions>` | :ref:`Custom fields <howto-custom-model-fields>`
+    * **Advanced:** :ref:`Managers <topics-db-managers>` | :ref:`Raw SQL <topics-db-sql>` | :ref:`Transactions <topics-db-transactions>` | :ref:`Aggregation <topics-db-aggregation>` | :ref:`Custom fields <howto-custom-model-fields>`
     * **Other:** :ref:`Supported databases <ref-databases>` | :ref:`Legacy databases <howto-legacy-databases>` | :ref:`Providing initial data <howto-initial-data>`
     * **Other:** :ref:`Supported databases <ref-databases>` | :ref:`Legacy databases <howto-legacy-databases>` | :ref:`Providing initial data <howto-initial-data>`
 
 
 The template layer
 The template layer

+ 1 - 1
docs/ref/models/index.txt

@@ -7,7 +7,7 @@ Model API reference. For introductory material, see :ref:`topics-db-models`.
 
 
 .. toctree::
 .. toctree::
    :maxdepth: 1
    :maxdepth: 1
-   
+
    fields
    fields
    relations
    relations
    options
    options

+ 186 - 0
docs/ref/models/querysets.txt

@@ -158,6 +158,48 @@ In SQL terms, that evaluates to::
 
 
 Note the second example is more restrictive.
 Note the second example is more restrictive.
 
 
+``annotate(*args, **kwargs)``
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. versionadded:: 1.1
+
+Annotates each object in the ``QuerySet`` with the provided list of
+aggregate values (averages, sums, etc) that have been computed over
+the objects that are related to the objects in the ``QuerySet``.
+Each argument to ``annotate()`` is an annotation that will be added
+to each object in the ``QuerySet`` that is returned.
+
+The aggregation functions that are provided by Django are described
+in `Aggregation Functions`_ below.
+
+Annotations specified using keyword arguments will use the keyword as
+the alias for the annotation. Anonymous arguments will have an alias
+generated for them based upon the name of the aggregate function and
+the model field that is being aggregated.
+
+For example, if you were manipulating a list of blogs, you may want
+to determine how many entries have been made in each blog::
+
+    >>> q = Blog.objects.annotate(Count('entry'))
+    # The name of the first blog
+    >>> q[0].name
+    'Blogasaurus'
+    # The number of entries on the first blog
+    >>> q[0].entry__count
+    42
+
+The ``Blog`` model doesn't define an ``entry_count`` attribute by itself,
+but by using a keyword argument to specify the aggregate function, you can
+control the name of the annotation::
+
+    >>> q = Blog.objects.annotate(number_of_entries=Count('entry'))
+    # The number of entries on the first blog, using the name provided
+    >>> q[0].number_of_entries
+    42
+
+For an in-depth discussion of aggregation, see :ref:`the topic guide on
+Aggregation <topics-db-aggregation>`.
+
 ``order_by(*fields)``
 ``order_by(*fields)``
 ~~~~~~~~~~~~~~~~~~~~~
 ~~~~~~~~~~~~~~~~~~~~~
 
 
@@ -931,6 +973,38 @@ exist with the given parameters.
 
 
 Note ``latest()`` exists purely for convenience and readability.
 Note ``latest()`` exists purely for convenience and readability.
 
 
+``aggregate(*args, **kwargs)``
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. versionadded:: 1.1
+
+Returns a dictionary of aggregate values (averages, sums, etc) calculated
+over the ``QuerySet``. Each argument to ``aggregate()`` specifies
+a value that will be included in the dictionary that is returned.
+
+The aggregation functions that are provided by Django are described
+in `Aggregation Functions`_ below.
+
+Aggregates specified using keyword arguments will use the keyword as
+the name for the annotation. Anonymous arguments will have an name
+generated for them based upon the name of the aggregate function and
+the model field that is being aggregated.
+
+For example, if you were manipulating blog entries, you may want to know
+the average number of authors contributing to blog entries::
+
+    >>> q = Blog.objects.aggregate(Count('entry'))
+    {'entry__count': 16}
+
+By using a keyword argument to specify the aggregate function, you can
+control the name of the aggregation value that is returned::
+
+    >>> q = Blog.objects.aggregate(number_of_entries=Count('entry'))
+    {'number_of_entries': 2.34}
+
+For an in-depth discussion of aggregation, see :ref:`the topic guide on
+Aggregation <topics-db-aggregation>`.
+
 .. _field-lookups:
 .. _field-lookups:
 
 
 Field lookups
 Field lookups
@@ -1326,3 +1400,115 @@ SQL equivalents::
 
 
     SELECT ... WHERE title REGEXP '(?i)^(an?|the) +'; -- SQLite
     SELECT ... WHERE title REGEXP '(?i)^(an?|the) +'; -- SQLite
 
 
+.. _aggregation-functions:
+
+Aggregation Functions
+---------------------
+
+.. versionadded:: 1.1
+
+Django provides the following aggregation functions in the
+``django.db.models`` module.
+
+``Avg``
+~~~~~~~
+
+.. class:: Avg(field)
+
+Returns the mean value of the given field.
+
+    * Default alias: ``<field>__avg``
+    * Return type: float
+
+``Count``
+~~~~~~~~~
+
+.. class:: Count(field, distinct=False)
+
+Returns the number of objects that are related through the provided field.
+
+    * Default alias: ``<field>__count``
+    * Return type: integer
+
+Has one optional argument:
+
+.. attribute:: distinct
+
+    If distinct=True, the count will only include unique instances. This has
+    the SQL equivalent of ``COUNT(DISTINCT field)``. Default value is ``False``.
+
+``Max``
+~~~~~~~
+
+.. class:: Max(field)
+
+Returns the maximum value of the given field.
+
+    * Default alias: ``<field>__max``
+    * Return type: same as input field
+
+``Min``
+~~~~~~~
+
+.. class:: Min(field)
+
+Returns the minimum value of the given field.
+
+    * Default alias: ``<field>__min``
+    * Return type: same as input field
+
+``StdDev``
+~~~~~~~~~
+
+.. class:: StdDev(field, sample=False)
+
+Returns the standard deviation of the data in the provided field.
+
+    * Default alias: ``<field>__stddev``
+    * Return type: float
+
+Has one optional argument:
+
+.. attribute:: sample
+
+    By default, ``StdDev`` returns the population standard deviation. However,
+    if ``sample=True``, the return value will be the sample standard deviation.
+
+.. admonition:: SQLite
+
+    SQLite doesn't provide ``StdDev`` out of the box. An implementation is
+    available as an extension module for SQLite. Consult the SQlite
+    documentation for instructions on obtaining and installing this extension.
+
+``Sum``
+~~~~~~~
+
+.. class:: Sum(field)
+
+Computes the sum of all values of the given field.
+
+    * Default alias: ``<field>__sum``
+    * Return type: same as input field
+
+``Variance``
+~~~~~~~~~
+
+.. class:: Variance(field, sample=False)
+
+Returns the variance of the data in the provided field.
+
+    * Default alias: ``<field>__variance``
+    * Return type: float
+
+Has one optional argument:
+
+.. attribute:: sample
+
+    By default, ``Variance`` returns the population variance. However,
+    if ``sample=True``, the return value will be the sample variance.
+
+.. admonition:: SQLite
+
+    SQLite doesn't provide ``Variance`` out of the box. An implementation is
+    available as an extension module for SQLite. Consult the SQlite
+    documentation for instructions on obtaining and installing this extension.

+ 323 - 0
docs/topics/db/aggregation.txt

@@ -0,0 +1,323 @@
+.. _topics-db-aggregation:
+
+=============
+ Aggregation
+=============
+
+.. versionadded:: 1.1
+
+.. currentmodule:: django.db.models
+
+The topic guide on :ref:`Django's database-abstraction API <topics-db-queries`
+described the way that you can use Django queries that create,
+retrieve, update and delete individual objects. However, sometimes you will
+need to retrieve values that are derived by summarizing or *aggregating* a
+collection of objects. This topic guide describes the ways that aggregate values
+can be generated and returned using Django queries.
+
+Throughout this guide, we'll refer to the following models. These models are
+used to track the inventory for a series of online bookstores:
+
+.. _queryset-model-example:
+
+.. code-block:: python
+
+    class Author(models.Model):
+       name = models.CharField(max_length=100)
+       age = models.IntegerField()
+       friends = models.ManyToManyField('self', blank=True)
+
+    class Publisher(models.Model):
+       name = models.CharField(max_length=300)
+       num_awards = models.IntegerField()
+
+    class Book(models.Model):
+       isbn = models.CharField(max_length=9)
+       name = models.CharField(max_length=300)
+       pages = models.IntegerField()
+       price = models.DecimalField(max_digits=10, decimal_places=2)
+       rating = models.FloatField()
+       authors = models.ManyToManyField(Author)
+       publisher = models.ForeignKey(Publisher)
+       pubdate = models.DateField
+
+    class Store(models.Model):
+       name = models.CharField(max_length=300)
+       books = models.ManyToManyField(Book)
+
+
+Generating aggregates over a QuerySet
+=====================================
+
+Django provides two ways to generate aggregates. The first way is to generate
+summary values over an entire ``QuerySet``. For example, say you wanted to
+calculate the average price of all books available for sale. Django's query
+syntax provides a means for describing the set of all books::
+
+    >>> Book.objects.all()
+
+What we need is a way to calculate summary values over the objects that
+belong to this ``QuerySet``. This is done by appending an ``aggregate()``
+clause onto the ``QuerySet``::
+
+    >>> from django.db.models import Avg
+    >>> Book.objects.all().aggregate(Avg('price'))
+    {'price__avg': 34.35}
+
+The ``all()`` is redundant in this example, so this could be simplified to::
+
+    >>> Book.objects.aggregate(Avg('price'))
+    {'price__avg': 34.35}
+
+The argument to the ``aggregate()`` clause describes the aggregate value that
+we want to compute - in this case, the average of the ``price`` field on the
+``Book`` model. A list of the aggregate functions that are available can be
+found in the :ref:`QuerySet reference <aggregation-functions>`.
+
+``aggregate()`` is a terminal clause for a ``QuerySet`` that, when invoked,
+returns a dictionary of name-value pairs. The name is an identifier for the
+aggregate value; the value is the computed aggregate. The name is
+automatically generated from the name of the field and the aggregate function.
+If you want to manually specify a name for the aggregate value, you can do so
+by providing that name when you specify the aggregate clause::
+
+    >>> Book.objects.aggregate(average_price=Avg('price'))
+    {'average_price': 34.35}
+
+If you want to generate more than one aggregate, you just add another
+argument to the ``aggregate()`` clause. So, if we also wanted to know
+the maximum and minimum price of all books, we would issue the query::
+
+    >>> Book.objects.aggregate(Avg('price'), Max('price'), Min('price'))
+    {'price__avg': 34.35, 'price__max': Decimal('81.20'), 'price__min': Decimal('12.99')}
+
+Generating aggregates for each item in a QuerySet
+=================================================
+
+The second way to generate summary values is to generate an independent
+summary for each object in a ``Queryset``. For example, if you are retrieving
+a list of books, you may want to know how many authors contributed to
+each book. Each Book has a many-to-many relationship with the Author; we
+want to summarize this relationship for each book in the ``QuerySet``.
+
+Per-object summaries can be generated using the ``annotate()`` clause.
+When an ``annotate()`` clause is specified, each object in the ``QuerySet``
+will be annotated with the specified values.
+
+The syntax for these annotations is identical to that used for the
+``aggregate()`` clause. Each argument to ``annotate()`` describes and
+aggregate that is to be calculated. For example, to annotate Books with
+the number of authors::
+
+    # Build an annotated queryset
+    >>> q = Book.objects.annotate(Count('authors'))
+    # Interrogate the first object in the queryset
+    >>> q[0]
+    <Book: The Definitive Guide to Django>
+    >>> q[0].authors__count
+    2
+    # Interrogate the second object in the queryset
+    >>> q[1]
+    <Book: Practical Django Projects>
+    >>> q[1].authors__count
+    1
+
+As with ``aggregate()``, the name for the annotation is automatically derived
+from the name of the aggregate function and the name of the field being
+aggregated. You can override this default name by providing an alias when you
+specify the annotation::
+
+    >>> q = Book.objects.annotate(num_authors=Count('authors'))
+    >>> q[0].num_authors
+    2
+    >>> q[1].num_authors
+    1
+
+Unlike ``aggregate()``, ``annotate()`` is *not* a terminal clause. The output
+of the ``annotate()`` clause is a ``QuerySet``; this ``QuerySet`` can be
+modified using any other ``QuerySet`` operation, including ``filter()``,
+``order_by``, or even additional calls to ``annotate()``.
+
+Joins and aggregates
+====================
+
+So far, we have dealt with aggregates over fields that belong to the
+model being queries. However, sometimes the value you want to aggregate
+will belong to a model that is related to the model you are querying.
+
+When specifying the field to be aggregated in an aggregate functions,
+Django will allow you to use the same
+:ref:`double underscore notation <field-lookups-intro>` that is used
+when referring to related fields in filters. Django will then handle
+any table joins that are required to retrieve and aggregate the
+related value.
+
+For example, to find the price range of books offered in each store,
+you could use the annotation::
+
+    >>> Store.objects.annotate(min_price=Min('books__price'), max_price=Min('books__price'))
+
+This tells Django to retrieve the Store model, join (through the
+many-to-many relationship) with the Book model, and aggregate on the
+price field of the book model to produce a minimum and maximum value.
+
+The same rules apply to the ``aggregate()`` clause. If you wanted to
+know the lowest and highest price of any book that is available for sale
+in a store, you could use the aggregate::
+
+    >>> Store.objects.aggregate(min_price=Min('books__price'), max_price=Min('books__price'))
+
+Join chains can be as deep as you required. For example, to extract the
+age of the youngest author of any book available for sale, you could
+issue the query::
+
+    >>> Store.objects.aggregate(youngest_age=Min('books__authors__age'))
+
+Aggregations and other QuerySet clauses
+=======================================
+
+``filter()`` and ``exclude()``
+------------------------------
+
+Aggregates can also participate in filters. Any ``filter()`` (or
+``exclude()``) applied to normal model fields will have the effect of
+constraining the objects that are considered for aggregation.
+
+When used with an ``annotate()`` clause, a filter has the effect of
+constraining the objects for which an annotation is calculated. For example,
+you can generate an annotated list of all books that have a title starting
+with "Django" using the query::
+
+    >>> Book.objects.filter(name__startswith="Django").annotate(num_authors=Count('authors'))
+
+When used with an ``aggregate()`` clause, a filter has the effect of
+constraining the objects over which the aggregate is calculated.
+For example, you can generate the average price of all books with a
+title that starts with "Django" using the query::
+
+    >>> Book.objects.filter(name__startswith="Django").aggregate(Avg('price'))
+
+Filtering on annotations
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+Annotated values can also be filtered. The alias for the annotation can be
+used in ``filter()`` and ``exclude()`` clauses in the same way as any other
+model field.
+
+For example, to generate a list of books that have more than one author,
+you can issue the query::
+
+    >>> Book.objects.annotate(num_authors=Count('authors')).filter(num_authors__gt=1)
+
+This query generates an annotated result set, and then generates a filter
+based upon that annotation.
+
+Order of ``annotate()`` and ``filter()`` clauses
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+When developing a complex query that involves both ``annotate()`` and
+``filter()`` clauses, particular attention should be paid to the order
+in which the clauses are applied to the ``QuerySet``.
+
+When an ``annotate()`` clause is applied to a query, the annotation is
+computed over the state of the query up to the point where the annotation
+is requested. The practical implication of this is that ``filter()`` and
+``annotate()`` are not transitive operations -- that is, there is a
+difference between the query::
+
+    >>> Publisher.objects.annotate(num_books=Count('book')).filter(book__rating__gt=3.0)
+
+and the query::
+
+    >>> Publisher.objects.filter(book__rating__gt=3.0).annotate(num_books=Count('book'))
+
+Both queries will return a list of Publishers that have at least one good
+book (i.e., a book with a rating exceeding 3.0). However, the annotation in
+the first query will provide the total number of all books published by the
+publisher; the second query will only include good books in the annotated
+count. In the first query, the annotation precedes the filter, so the
+filter has no effect on the annotation. In the second query, the filter
+preceeds the annotation, and as a result, the filter constrains the objects
+considered when calculating the annotation.
+
+``order_by()``
+--------------
+
+Annotations can be used as a basis for ordering. When you
+define an ``order_by()`` clause, the aggregates you provide can reference
+any alias defined as part of an ``annotate()`` clause in the query.
+
+For example, to order a ``QuerySet`` of books by the number of authors
+that have contributed to the book, you could use the following query::
+
+    >>> Book.objects.annotate(num_authors=Count('authors')).order_by('num_authors')
+
+``values()``
+------------
+
+Ordinarily, annotations are generated on a per-object basis - an annotated
+``QuerySet`` will return one result for each object in the original
+``Queryset``. However, when a ``values()`` clause is used to constrain the
+columns that are returned in the result set, the method for evaluating
+annotations is slightly different. Instead of returning an annotated result
+for each result in the original ``QuerySet``, the original results are
+grouped according to the unique combinations of the fields specified in the
+``values()`` clause. An annotation is then provided for each unique group;
+the annotation is computed over all members of the group.
+
+For example, consider an author query that attempts to find out the average
+rating of books written by each author:
+
+    >>> Author.objects.annotate(average_rating=Avg('book_rating'))
+
+This will return one result for each author in the database, annotate with
+their average book rating.
+
+However, the result will be slightly different if you use a ``values()`` clause::
+
+    >>> Author.objects.values('name').annotate(average_rating=Avg('book_rating'))
+
+In this example, the authors will be grouped by name, so you will only get
+an annotated result for each *unique* author name. This means if you have
+two authors with the same name, their results will be merged into a single
+result in the output of the query; the average will be computed as the
+average over the books written by both authors.
+
+The annotation name will be added to the fields returned
+as part of the ``ValuesQuerySet``.
+
+Order of ``annotate()`` and ``filter()`` clauses
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+As with the ``filter()`` clause, the order in which ``annotate()`` and
+``values()`` clauses are applied to a query is significant. If the
+``values()`` clause precedes the ``annotate()``, the annotation will be
+computed using the grouping described by the ``values()`` clause.
+
+However, if the ``annotate()`` clause precedes the ``values()`` clause,
+the annotations will be generated over the entire query set. In this case,
+the ``values()`` clause only constrains the fields that are generated on
+output.
+
+For example, if we reverse the order of the ``values()`` and ``annotate()``
+clause from our previous example::
+
+    >>> Author.objects.annotate(average_rating=Avg('book_rating')).values('name')
+
+This will now yield one unique result for each author; however, only
+the author's name and the ``average_rating`` annotation will be returned
+in the output data.
+
+Aggregating annotations
+-----------------------
+
+You can also generate an aggregate on the result of an annotation. When you
+define an ``aggregate()`` clause, the aggregates you provide can reference
+any alias defined as part of an ``annotate()`` clause in the query.
+
+For example, if you wanted to calculate the average number of authors per
+book you first annotate the set of books with the author count, then
+aggregate that author count, referencing the annotation field::
+
+    >>> Book.objects.annotate(num_authors=Count('authors')).aggregate(Avg('num_authors'))
+    {'num_authors__avg': 1.66}

+ 1 - 0
docs/topics/db/index.txt

@@ -12,6 +12,7 @@ model maps to a single database table.
 
 
    models
    models
    queries
    queries
+   aggregation
    managers
    managers
    sql
    sql
    transactions
    transactions

+ 0 - 0
tests/modeltests/aggregation/__init__.py


+ 229 - 0
tests/modeltests/aggregation/fixtures/initial_data.json

@@ -0,0 +1,229 @@
+[
+    {
+        "pk": 1,
+        "model": "aggregation.publisher",
+        "fields": {
+            "name": "Apress",
+            "num_awards": 3
+        }
+    },
+    {
+        "pk": 2,
+        "model": "aggregation.publisher",
+        "fields": {
+            "name": "Sams",
+            "num_awards": 1
+        }
+    },
+    {
+        "pk": 3,
+        "model": "aggregation.publisher",
+        "fields": {
+            "name": "Prentice Hall",
+            "num_awards": 7
+        }
+    },
+    {
+        "pk": 4,
+        "model": "aggregation.publisher",
+        "fields": {
+            "name": "Morgan Kaufmann",
+            "num_awards": 9
+        }
+    },
+    {
+        "pk": 1,
+        "model": "aggregation.book",
+        "fields": {
+            "publisher": 1,
+            "isbn": "159059725",
+            "name": "The Definitive Guide to Django: Web Development Done Right",
+            "price": "30.00",
+            "rating": 4.5,
+            "authors": [1, 2],
+            "pages": 447,
+            "pubdate": "2007-12-6"
+        }
+    },
+    {
+        "pk": 2,
+        "model": "aggregation.book",
+        "fields": {
+            "publisher": 2,
+            "isbn": "067232959",
+            "name": "Sams Teach Yourself Django in 24 Hours",
+            "price": "23.09",
+            "rating": 3.0,
+            "authors": [3],
+            "pages": 528,
+            "pubdate": "2008-3-3"
+        }
+    },
+    {
+        "pk": 3,
+        "model": "aggregation.book",
+        "fields": {
+            "publisher": 1,
+            "isbn": "159059996",
+            "name": "Practical Django Projects",
+            "price": "29.69",
+            "rating": 4.0,
+            "authors": [4],
+            "pages": 300,
+            "pubdate": "2008-6-23"
+        }
+    },
+    {
+        "pk": 4,
+        "model": "aggregation.book",
+        "fields": {
+            "publisher": 3,
+            "isbn": "013235613",
+            "name": "Python Web Development with Django",
+            "price": "29.69",
+            "rating": 4.0,
+            "authors": [5, 6, 7],
+            "pages": 350,
+            "pubdate": "2008-11-3"
+        }
+    },
+    {
+        "pk": 5,
+        "model": "aggregation.book",
+        "fields": {
+            "publisher": 3,
+            "isbn": "013790395",
+            "name": "Artificial Intelligence: A Modern Approach",
+            "price": "82.80",
+            "rating": 4.0,
+            "authors": [8, 9],
+            "pages": 1132,
+            "pubdate": "1995-1-15"
+        }
+    },
+    {
+        "pk": 6,
+        "model": "aggregation.book",
+        "fields": {
+            "publisher": 4,
+            "isbn": "155860191",
+            "name": "Paradigms of Artificial Intelligence Programming: Case Studies in Common Lisp",
+            "price": "75.00",
+            "rating": 5.0,
+            "authors": [8],
+            "pages": 946,
+            "pubdate": "1991-10-15"
+        }
+    },
+    {
+        "pk": 1,
+        "model": "aggregation.store",
+        "fields": {
+            "books": [1, 2, 3, 4, 5, 6],
+            "name": "Amazon.com",
+            "original_opening": "1994-4-23 9:17:42",
+            "friday_night_closing": "23:59:59"
+        }
+    },
+    {
+        "pk": 2,
+        "model": "aggregation.store",
+        "fields": {
+            "books": [1, 3, 5, 6],
+            "name": "Books.com",
+            "original_opening": "2001-3-15 11:23:37",
+            "friday_night_closing": "23:59:59"
+        }
+    },
+    {
+        "pk": 3,
+        "model": "aggregation.store",
+        "fields": {
+            "books": [3, 4, 6],
+            "name": "Mamma and Pappa's Books",
+            "original_opening": "1945-4-25 16:24:14",
+            "friday_night_closing": "21:30:00"
+        }
+    },
+    {
+        "pk": 1,
+        "model": "aggregation.author",
+        "fields": {
+            "age": 34,
+            "friends": [2, 4],
+            "name": "Adrian Holovaty"
+        }
+    },
+    {
+        "pk": 2,
+        "model": "aggregation.author",
+        "fields": {
+            "age": 35,
+            "friends": [1, 7],
+            "name": "Jacob Kaplan-Moss"
+        }
+    },
+    {
+        "pk": 3,
+        "model": "aggregation.author",
+        "fields": {
+            "age": 45,
+            "friends": [],
+            "name": "Brad Dayley"
+        }
+    },
+    {
+        "pk": 4,
+        "model": "aggregation.author",
+        "fields": {
+            "age": 29,
+            "friends": [1],
+            "name": "James Bennett"
+        }
+    },
+    {
+        "pk": 5,
+        "model": "aggregation.author",
+        "fields": {
+            "age": 37,
+            "friends": [6, 7],
+            "name": "Jeffrey Forcier "
+        }
+    },
+    {
+        "pk": 6,
+        "model": "aggregation.author",
+        "fields": {
+            "age": 29,
+            "friends": [5, 7],
+            "name": "Paul Bissex"
+        }
+    },
+    {
+        "pk": 7,
+        "model": "aggregation.author",
+        "fields": {
+            "age": 25,
+            "friends": [2, 5, 6],
+            "name": "Wesley J. Chun"
+        }
+    },
+    {
+        "pk": 8,
+        "model": "aggregation.author",
+        "fields": {
+            "age": 57,
+            "friends": [9],
+            "name": "Peter Norvig"
+        }
+    },
+    {
+        "pk": 9,
+        "model": "aggregation.author",
+        "fields": {
+            "age": 46,
+            "friends": [8],
+            "name": "Stuart Russell"
+        }
+    }
+]

+ 379 - 0
tests/modeltests/aggregation/models.py

@@ -0,0 +1,379 @@
+# coding: utf-8
+from django.db import models
+
+try:
+    sorted
+except NameError:
+    from django.utils.itercompat import sorted      # For Python 2.3
+
+class Author(models.Model):
+   name = models.CharField(max_length=100)
+   age = models.IntegerField()
+   friends = models.ManyToManyField('self', blank=True)
+
+   def __unicode__(self):
+      return self.name
+
+class Publisher(models.Model):
+   name = models.CharField(max_length=300)
+   num_awards = models.IntegerField()
+
+   def __unicode__(self):
+      return self.name
+
+class Book(models.Model):
+   isbn = models.CharField(max_length=9)
+   name = models.CharField(max_length=300)
+   pages = models.IntegerField()
+   rating = models.FloatField()
+   price = models.DecimalField(decimal_places=2, max_digits=6)
+   authors = models.ManyToManyField(Author)
+   publisher = models.ForeignKey(Publisher)
+   pubdate = models.DateField()
+
+   def __unicode__(self):
+      return self.name
+
+class Store(models.Model):
+   name = models.CharField(max_length=300)
+   books = models.ManyToManyField(Book)
+   original_opening = models.DateTimeField()
+   friday_night_closing = models.TimeField()
+
+   def __unicode__(self):
+      return self.name
+
+class Entries(models.Model):
+   EntryID = models.AutoField(primary_key=True, db_column='Entry ID')
+   Entry = models.CharField(unique=True, max_length=50)
+   Exclude = models.BooleanField()
+
+class Clues(models.Model):
+   ID = models.AutoField(primary_key=True)
+   EntryID = models.ForeignKey(Entries, verbose_name='Entry', db_column = 'Entry ID')
+   Clue = models.CharField(max_length=150)
+
+# Tests on 'aggergate'
+# Different backends and numbers.
+__test__ = {'API_TESTS': """
+>>> from django.core import management
+>>> try:
+...     from decimal import Decimal
+... except:
+...     from django.utils._decimal import Decimal
+>>> from datetime import date
+
+# Reset the database representation of this app.
+# This will return the database to a clean initial state.
+>>> management.call_command('flush', verbosity=0, interactive=False)
+
+# Empty Call - request nothing, get nothing.
+>>> Author.objects.all().aggregate()
+{}
+
+>>> from django.db.models import Avg, Sum, Count, Max, Min
+
+# Single model aggregation
+#
+
+# Single aggregate
+# Average age of Authors
+>>> Author.objects.all().aggregate(Avg('age'))
+{'age__avg': 37.4...}
+
+# Multiple aggregates
+# Average and Sum of Author ages
+>>> Author.objects.all().aggregate(Sum('age'), Avg('age'))
+{'age__sum': 337, 'age__avg': 37.4...}
+
+# Aggreates interact with filters, and only
+# generate aggregate values for the filtered values
+# Sum of the age of those older than 29 years old
+>>> Author.objects.all().filter(age__gt=29).aggregate(Sum('age'))
+{'age__sum': 254}
+
+# Depth-1 Joins
+#
+
+# On Relationships with self
+# Average age of the friends of each author
+>>> Author.objects.all().aggregate(Avg('friends__age'))
+{'friends__age__avg': 34.07...}
+
+# On ManyToMany Relationships
+#
+
+# Forward
+# Average age of the Authors of Books with a rating of less than 4.5
+>>> Book.objects.all().filter(rating__lt=4.5).aggregate(Avg('authors__age'))
+{'authors__age__avg': 38.2...}
+
+# Backward
+# Average rating of the Books whose Author's name contains the letter 'a'
+>>> Author.objects.all().filter(name__contains='a').aggregate(Avg('book__rating'))
+{'book__rating__avg': 4.0}
+
+# On OneToMany Relationships
+#
+
+# Forward
+# Sum of the number of awards of each Book's Publisher
+>>> Book.objects.all().aggregate(Sum('publisher__num_awards'))
+{'publisher__num_awards__sum': 30}
+
+# Backward
+# Sum of the price of every Book that has a Publisher
+>>> Publisher.objects.all().aggregate(Sum('book__price'))
+{'book__price__sum': Decimal("270.27")}
+
+# Multiple Joins
+#
+
+# Forward
+>>> Store.objects.all().aggregate(Max('books__authors__age'))
+{'books__authors__age__max': 57}
+
+# Backward
+# Note that the very long default alias may be truncated
+>>> Author.objects.all().aggregate(Min('book__publisher__num_awards'))
+{'book__publisher__num_award...': 1}
+
+# Aggregate outputs can also be aliased.
+
+# Average amazon.com Book rating
+>>> Store.objects.filter(name='Amazon.com').aggregate(amazon_mean=Avg('books__rating'))
+{'amazon_mean': 4.08...}
+
+# Tests on annotate()
+
+# An empty annotate call does nothing but return the same QuerySet
+>>> Book.objects.all().annotate().order_by('pk')
+[<Book: The Definitive Guide to Django: Web Development Done Right>, <Book: Sams Teach Yourself Django in 24 Hours>, <Book: Practical Django Projects>, <Book: Python Web Development with Django>, <Book: Artificial Intelligence: A Modern Approach>, <Book: Paradigms of Artificial Intelligence Programming: Case Studies in Common Lisp>]
+
+# Annotate inserts the alias into the model object with the aggregated result
+>>> books = Book.objects.all().annotate(mean_age=Avg('authors__age'))
+>>> books.get(pk=1).name
+u'The Definitive Guide to Django: Web Development Done Right'
+
+>>> books.get(pk=1).mean_age
+34.5
+
+# On ManyToMany Relationships
+
+# Forward
+# Average age of the Authors of each book with a rating less than 4.5
+>>> books = Book.objects.all().filter(rating__lt=4.5).annotate(Avg('authors__age'))
+>>> sorted([(b.name, b.authors__age__avg) for b in books])
+[(u'Artificial Intelligence: A Modern Approach', 51.5), (u'Practical Django Projects', 29.0), (u'Python Web Development with Django', 30.3...), (u'Sams Teach Yourself Django in 24 Hours', 45.0)]
+
+# Count the number of authors of each book
+>>> books = Book.objects.annotate(num_authors=Count('authors'))
+>>> sorted([(b.name, b.num_authors) for b in books])
+[(u'Artificial Intelligence: A Modern Approach', 2), (u'Paradigms of Artificial Intelligence Programming: Case Studies in Common Lisp', 1), (u'Practical Django Projects', 1), (u'Python Web Development with Django', 3), (u'Sams Teach Yourself Django in 24 Hours', 1), (u'The Definitive Guide to Django: Web Development Done Right', 2)]
+
+# Backward
+# Average rating of the Books whose Author's names contains the letter 'a'
+>>> authors = Author.objects.all().filter(name__contains='a').annotate(Avg('book__rating'))
+>>> sorted([(a.name, a.book__rating__avg) for a in authors])
+[(u'Adrian Holovaty', 4.5), (u'Brad Dayley', 3.0), (u'Jacob Kaplan-Moss', 4.5), (u'James Bennett', 4.0), (u'Paul Bissex', 4.0), (u'Stuart Russell', 4.0)]
+
+# Count the number of books written by each author
+>>> authors = Author.objects.annotate(num_books=Count('book'))
+>>> sorted([(a.name, a.num_books) for a in authors])
+[(u'Adrian Holovaty', 1), (u'Brad Dayley', 1), (u'Jacob Kaplan-Moss', 1), (u'James Bennett', 1), (u'Jeffrey Forcier ', 1), (u'Paul Bissex', 1), (u'Peter Norvig', 2), (u'Stuart Russell', 1), (u'Wesley J. Chun', 1)]
+
+# On OneToMany Relationships
+
+# Forward
+# Annotate each book with the number of awards of each Book's Publisher
+>>> books = Book.objects.all().annotate(Sum('publisher__num_awards'))
+>>> sorted([(b.name, b.publisher__num_awards__sum) for b in books])
+[(u'Artificial Intelligence: A Modern Approach', 7), (u'Paradigms of Artificial Intelligence Programming: Case Studies in Common Lisp', 9), (u'Practical Django Projects', 3), (u'Python Web Development with Django', 7), (u'Sams Teach Yourself Django in 24 Hours', 1), (u'The Definitive Guide to Django: Web Development Done Right', 3)]
+
+# Backward
+# Annotate each publisher with the sum of the price of all books sold
+>>> publishers = Publisher.objects.all().annotate(Sum('book__price'))
+>>> sorted([(p.name, p.book__price__sum) for p in publishers])
+[(u'Apress', Decimal("59.69")), (u'Morgan Kaufmann', Decimal("75.00")), (u'Prentice Hall', Decimal("112.49")), (u'Sams', Decimal("23.09"))]
+
+# Calls to values() are not commutative over annotate().
+
+# Calling values on a queryset that has annotations returns the output
+# as a dictionary
+>>> Book.objects.filter(pk=1).annotate(mean_age=Avg('authors__age')).values()
+[{'rating': 4.5, 'isbn': u'159059725', 'name': u'The Definitive Guide to Django: Web Development Done Right', 'pubdate': datetime.date(2007, 12, 6), 'price': Decimal("30..."), 'id': 1, 'publisher_id': 1, 'pages': 447, 'mean_age': 34.5}]
+
+>>> Book.objects.filter(pk=1).annotate(mean_age=Avg('authors__age')).values('pk', 'isbn', 'mean_age')
+[{'pk': 1, 'isbn': u'159059725', 'mean_age': 34.5}]
+
+# Calling it with paramters reduces the output but does not remove the
+# annotation.
+>>> Book.objects.filter(pk=1).annotate(mean_age=Avg('authors__age')).values('name')
+[{'name': u'The Definitive Guide to Django: Web Development Done Right', 'mean_age': 34.5}]
+
+# An empty values() call before annotating has the same effect as an
+# empty values() call after annotating
+>>> Book.objects.filter(pk=1).values().annotate(mean_age=Avg('authors__age'))
+[{'rating': 4.5, 'isbn': u'159059725', 'name': u'The Definitive Guide to Django: Web Development Done Right', 'pubdate': datetime.date(2007, 12, 6), 'price': Decimal("30..."), 'id': 1, 'publisher_id': 1, 'pages': 447, 'mean_age': 34.5}]
+
+# Calling annotate() on a ValuesQuerySet annotates over the groups of
+# fields to be selected by the ValuesQuerySet.
+
+# Note that an extra parameter is added to each dictionary. This
+# parameter is a queryset representing the objects that have been
+# grouped to generate the annotation
+
+>>> Book.objects.all().values('rating').annotate(n_authors=Count('authors__id'), mean_age=Avg('authors__age')).order_by('rating')
+[{'rating': 3.0, 'n_authors': 1, 'mean_age': 45.0}, {'rating': 4.0, 'n_authors': 6, 'mean_age': 37.1...}, {'rating': 4.5, 'n_authors': 2, 'mean_age': 34.5}, {'rating': 5.0, 'n_authors': 1, 'mean_age': 57.0}]
+
+# If a join doesn't match any objects, an aggregate returns None
+>>> authors = Author.objects.all().annotate(Avg('friends__age')).order_by('id')
+>>> len(authors)
+9
+>>> sorted([(a.name, a.friends__age__avg) for a in authors])
+[(u'Adrian Holovaty', 32.0), (u'Brad Dayley', None), (u'Jacob Kaplan-Moss', 29.5), (u'James Bennett', 34.0), (u'Jeffrey Forcier ', 27.0), (u'Paul Bissex', 31.0), (u'Peter Norvig', 46.0), (u'Stuart Russell', 57.0), (u'Wesley J. Chun', 33.6...)]
+
+
+# The Count aggregation function allows an extra parameter: distinct.
+# This restricts the count results to unique items
+>>> Book.objects.all().aggregate(Count('rating'))
+{'rating__count': 6}
+
+>>> Book.objects.all().aggregate(Count('rating', distinct=True))
+{'rating__count': 4}
+
+# Retreiving the grouped objects
+
+# When using Count you can also omit the primary key and refer only to
+# the related field name if you want to count all the related objects
+# and not a specific column
+>>> explicit = list(Author.objects.annotate(Count('book__id')))
+>>> implicit = list(Author.objects.annotate(Count('book')))
+>>> explicit == implicit
+True
+
+# Ordering is allowed on aggregates
+>>> Book.objects.values('rating').annotate(oldest=Max('authors__age')).order_by('oldest', 'rating')
+[{'rating': 4.5, 'oldest': 35}, {'rating': 3.0, 'oldest': 45}, {'rating': 4.0, 'oldest': 57}, {'rating': 5.0, 'oldest': 57}]
+
+>>> Book.objects.values('rating').annotate(oldest=Max('authors__age')).order_by('-oldest', '-rating')
+[{'rating': 5.0, 'oldest': 57}, {'rating': 4.0, 'oldest': 57}, {'rating': 3.0, 'oldest': 45}, {'rating': 4.5, 'oldest': 35}]
+
+# It is possible to aggregate over anotated values
+>>> Book.objects.all().annotate(num_authors=Count('authors__id')).aggregate(Avg('num_authors'))
+{'num_authors__avg': 1.66...}
+
+# You can filter the results based on the aggregation alias.
+
+# Lets add a publisher to test the different possibilities for filtering
+>>> p = Publisher(name='Expensive Publisher', num_awards=0)
+>>> p.save()
+>>> Book(name='ExpensiveBook1', pages=1, isbn='111', rating=3.5, price=Decimal("1000"), publisher=p, pubdate=date(2008,12,1)).save()
+>>> Book(name='ExpensiveBook2', pages=1, isbn='222', rating=4.0, price=Decimal("1000"), publisher=p, pubdate=date(2008,12,2)).save()
+>>> Book(name='ExpensiveBook3', pages=1, isbn='333', rating=4.5, price=Decimal("35"), publisher=p, pubdate=date(2008,12,3)).save()
+
+# Publishers that have:
+
+# (i) more than one book
+>>> Publisher.objects.annotate(num_books=Count('book__id')).filter(num_books__gt=1).order_by('pk')
+[<Publisher: Apress>, <Publisher: Prentice Hall>, <Publisher: Expensive Publisher>]
+
+# (ii) a book that cost less than 40
+>>> Publisher.objects.filter(book__price__lt=Decimal("40.0")).order_by('pk')
+[<Publisher: Apress>, <Publisher: Apress>, <Publisher: Sams>, <Publisher: Prentice Hall>, <Publisher: Expensive Publisher>]
+
+# (iii) more than one book and (at least) a book that cost less than 40
+>>> Publisher.objects.annotate(num_books=Count('book__id')).filter(num_books__gt=1, book__price__lt=Decimal("40.0")).order_by('pk')
+[<Publisher: Apress>, <Publisher: Prentice Hall>, <Publisher: Expensive Publisher>]
+
+# (iv) more than one book that costs less than $40
+>>> Publisher.objects.filter(book__price__lt=Decimal("40.0")).annotate(num_books=Count('book__id')).filter(num_books__gt=1).order_by('pk')
+[<Publisher: Apress>]
+
+# Now a bit of testing on the different lookup types
+#
+
+>>> Publisher.objects.annotate(num_books=Count('book')).filter(num_books__range=[1, 3]).order_by('pk')
+[<Publisher: Apress>, <Publisher: Sams>, <Publisher: Prentice Hall>, <Publisher: Morgan Kaufmann>, <Publisher: Expensive Publisher>]
+
+>>> Publisher.objects.annotate(num_books=Count('book')).filter(num_books__range=[1, 2]).order_by('pk')
+[<Publisher: Apress>, <Publisher: Sams>, <Publisher: Prentice Hall>, <Publisher: Morgan Kaufmann>]
+
+>>> Publisher.objects.annotate(num_books=Count('book')).filter(num_books__in=[1, 3]).order_by('pk')
+[<Publisher: Sams>, <Publisher: Morgan Kaufmann>, <Publisher: Expensive Publisher>]
+
+>>> Publisher.objects.annotate(num_books=Count('book')).filter(num_books__isnull=True)
+[]
+
+>>> p.delete()
+
+# Does Author X have any friends? (or better, how many friends does author X have)
+>> Author.objects.filter(pk=1).aggregate(Count('friends__id'))
+{'friends__id__count': 2.0}
+
+# Give me a list of all Books with more than 1 authors
+>>> Book.objects.all().annotate(num_authors=Count('authors__name')).filter(num_authors__ge=2).order_by('pk')
+[<Book: The Definitive Guide to Django: Web Development Done Right>, <Book: Artificial Intelligence: A Modern Approach>]
+
+# Give me a list of all Authors that have no friends
+>>> Author.objects.all().annotate(num_friends=Count('friends__id', distinct=True)).filter(num_friends=0).order_by('pk')
+[<Author: Brad Dayley>]
+
+# Give me a list of all publishers that have published more than 1 books
+>>> Publisher.objects.all().annotate(num_books=Count('book__id')).filter(num_books__gt=1).order_by('pk')
+[<Publisher: Apress>, <Publisher: Prentice Hall>]
+
+# Give me a list of all publishers that have published more than 1 books that cost less than 40
+>>> Publisher.objects.all().filter(book__price__lt=Decimal("40.0")).annotate(num_books=Count('book__id')).filter(num_books__gt=1)
+[<Publisher: Apress>]
+
+# Give me a list of all Books that were written by X and one other author.
+>>> Book.objects.all().annotate(num_authors=Count('authors__id')).filter(authors__name__contains='Norvig', num_authors__gt=1)
+[<Book: Artificial Intelligence: A Modern Approach>]
+
+# Give me the average rating of all Books that were written by X and one other author.
+#(Aggregate over objects discovered using membership of the m2m set)
+
+# Adding an existing author to another book to test it the right way
+>>> a = Author.objects.get(name__contains='Norvig')
+>>> b = Book.objects.get(name__contains='Done Right')
+>>> b.authors.add(a)
+>>> b.save()
+
+# This should do it
+>>> Book.objects.all().annotate(num_authors=Count('authors__id')).filter(authors__name__contains='Norvig', num_authors__gt=1).aggregate(Avg('rating'))
+{'rating__avg': 4.25}
+>>> b.authors.remove(a)
+
+# Give me a list of all Authors that have published a book with at least one other person
+# (Filters over a count generated on a related object)
+#
+# Cheating: [a for a in Author.objects.all().annotate(num_coleagues=Count('book__authors__id'), num_books=Count('book__id', distinct=True)) if a.num_coleagues - a.num_books > 0]
+# F-Syntax is required. Will be fixed after F objects are available
+
+# Tests on fields with non-default table and column names.
+>>> Clues.objects.values('EntryID__Entry').annotate(Appearances=Count('EntryID'), Distinct_Clues=Count('Clue', distinct=True))
+[]
+
+# Aggregates also work on dates, times and datetimes
+>>> Publisher.objects.annotate(earliest_book=Min('book__pubdate')).order_by('earliest_book').values()
+[{'earliest_book': datetime.date(1991, 10, 15), 'num_awards': 9, 'id': 4, 'name': u'Morgan Kaufmann'}, {'earliest_book': datetime.date(1995, 1, 15), 'num_awards': 7, 'id': 3, 'name': u'Prentice Hall'}, {'earliest_book': datetime.date(2007, 12, 6), 'num_awards': 3, 'id': 1, 'name': u'Apress'}, {'earliest_book': datetime.date(2008, 3, 3), 'num_awards': 1, 'id': 2, 'name': u'Sams'}]
+
+>>> Store.objects.aggregate(Max('friday_night_closing'), Min("original_opening"))
+{'friday_night_closing__max': datetime.time(23, 59, 59), 'original_opening__min': datetime.datetime(1945, 4, 25, 16, 24, 14)}
+
+# values_list() can also be used
+
+>>> Book.objects.filter(pk=1).annotate(mean_age=Avg('authors__age')).values_list('pk', 'isbn', 'mean_age')
+[(1, u'159059725', 34.5)]
+
+>>> Book.objects.filter(pk=1).annotate(mean_age=Avg('authors__age')).values_list('isbn')
+[(u'159059725',)]
+
+>>> Book.objects.filter(pk=1).annotate(mean_age=Avg('authors__age')).values_list('mean_age')
+[(34.5,)]
+
+>>> Book.objects.filter(pk=1).annotate(mean_age=Avg('authors__age')).values_list('mean_age', flat=True)
+[34.5]
+
+"""}

+ 0 - 0
tests/regressiontests/aggregation_regress/__init__.py


+ 229 - 0
tests/regressiontests/aggregation_regress/fixtures/initial_data.json

@@ -0,0 +1,229 @@
+[
+    {
+        "pk": 1,
+        "model": "aggregation_regress.publisher",
+        "fields": {
+            "name": "Apress",
+            "num_awards": 3
+        }
+    },
+    {
+        "pk": 2,
+        "model": "aggregation_regress.publisher",
+        "fields": {
+            "name": "Sams",
+            "num_awards": 1
+        }
+    },
+    {
+        "pk": 3,
+        "model": "aggregation_regress.publisher",
+        "fields": {
+            "name": "Prentice Hall",
+            "num_awards": 7
+        }
+    },
+    {
+        "pk": 4,
+        "model": "aggregation_regress.publisher",
+        "fields": {
+            "name": "Morgan Kaufmann",
+            "num_awards": 9
+        }
+    },
+    {
+        "pk": 1,
+        "model": "aggregation_regress.book",
+        "fields": {
+            "publisher": 1,
+            "isbn": "159059725",
+            "name": "The Definitive Guide to Django: Web Development Done Right",
+            "price": "30.00",
+            "rating": 4.5,
+            "authors": [1, 2],
+            "pages": 447,
+            "pubdate": "2007-12-6"
+        }
+    },
+    {
+        "pk": 2,
+        "model": "aggregation_regress.book",
+        "fields": {
+            "publisher": 2,
+            "isbn": "067232959",
+            "name": "Sams Teach Yourself Django in 24 Hours",
+            "price": "23.09",
+            "rating": 3.0,
+            "authors": [3],
+            "pages": 528,
+            "pubdate": "2008-3-3"
+        }
+    },
+    {
+        "pk": 3,
+        "model": "aggregation_regress.book",
+        "fields": {
+            "publisher": 1,
+            "isbn": "159059996",
+            "name": "Practical Django Projects",
+            "price": "29.69",
+            "rating": 4.0,
+            "authors": [4],
+            "pages": 300,
+            "pubdate": "2008-6-23"
+        }
+    },
+    {
+        "pk": 4,
+        "model": "aggregation_regress.book",
+        "fields": {
+            "publisher": 3,
+            "isbn": "013235613",
+            "name": "Python Web Development with Django",
+            "price": "29.69",
+            "rating": 4.0,
+            "authors": [5, 6, 7],
+            "pages": 350,
+            "pubdate": "2008-11-3"
+        }
+    },
+    {
+        "pk": 5,
+        "model": "aggregation_regress.book",
+        "fields": {
+            "publisher": 3,
+            "isbn": "013790395",
+            "name": "Artificial Intelligence: A Modern Approach",
+            "price": "82.80",
+            "rating": 4.0,
+            "authors": [8, 9],
+            "pages": 1132,
+            "pubdate": "1995-1-15"
+        }
+    },
+    {
+        "pk": 6,
+        "model": "aggregation_regress.book",
+        "fields": {
+            "publisher": 4,
+            "isbn": "155860191",
+            "name": "Paradigms of Artificial Intelligence Programming: Case Studies in Common Lisp",
+            "price": "75.00",
+            "rating": 5.0,
+            "authors": [8],
+            "pages": 946,
+            "pubdate": "1991-10-15"
+        }
+    },
+    {
+        "pk": 1,
+        "model": "aggregation_regress.store",
+        "fields": {
+            "books": [1, 2, 3, 4, 5, 6],
+            "name": "Amazon.com",
+            "original_opening": "1994-4-23 9:17:42",
+            "friday_night_closing": "23:59:59"
+        }
+    },
+    {
+        "pk": 2,
+        "model": "aggregation_regress.store",
+        "fields": {
+            "books": [1, 3, 5, 6],
+            "name": "Books.com",
+            "original_opening": "2001-3-15 11:23:37",
+            "friday_night_closing": "23:59:59"
+        }
+    },
+    {
+        "pk": 3,
+        "model": "aggregation_regress.store",
+        "fields": {
+            "books": [3, 4, 6],
+            "name": "Mamma and Pappa's Books",
+            "original_opening": "1945-4-25 16:24:14",
+            "friday_night_closing": "21:30:00"
+        }
+    },
+    {
+        "pk": 1,
+        "model": "aggregation_regress.author",
+        "fields": {
+            "age": 34,
+            "friends": [2, 4],
+            "name": "Adrian Holovaty"
+        }
+    },
+    {
+        "pk": 2,
+        "model": "aggregation_regress.author",
+        "fields": {
+            "age": 35,
+            "friends": [1, 7],
+            "name": "Jacob Kaplan-Moss"
+        }
+    },
+    {
+        "pk": 3,
+        "model": "aggregation_regress.author",
+        "fields": {
+            "age": 45,
+            "friends": [],
+            "name": "Brad Dayley"
+        }
+    },
+    {
+        "pk": 4,
+        "model": "aggregation_regress.author",
+        "fields": {
+            "age": 29,
+            "friends": [1],
+            "name": "James Bennett"
+        }
+    },
+    {
+        "pk": 5,
+        "model": "aggregation_regress.author",
+        "fields": {
+            "age": 37,
+            "friends": [6, 7],
+            "name": "Jeffrey Forcier "
+        }
+    },
+    {
+        "pk": 6,
+        "model": "aggregation_regress.author",
+        "fields": {
+            "age": 29,
+            "friends": [5, 7],
+            "name": "Paul Bissex"
+        }
+    },
+    {
+        "pk": 7,
+        "model": "aggregation_regress.author",
+        "fields": {
+            "age": 25,
+            "friends": [2, 5, 6],
+            "name": "Wesley J. Chun"
+        }
+    },
+    {
+        "pk": 8,
+        "model": "aggregation_regress.author",
+        "fields": {
+            "age": 57,
+            "friends": [9],
+            "name": "Peter Norvig"
+        }
+    },
+    {
+        "pk": 9,
+        "model": "aggregation_regress.author",
+        "fields": {
+            "age": 46,
+            "friends": [8],
+            "name": "Stuart Russell"
+        }
+    }
+]

+ 199 - 0
tests/regressiontests/aggregation_regress/models.py

@@ -0,0 +1,199 @@
+# coding: utf-8
+from django.db import models
+from django.conf import settings
+
+try:
+    sorted
+except NameError:
+    from django.utils.itercompat import sorted      # For Python 2.3
+
+class Author(models.Model):
+   name = models.CharField(max_length=100)
+   age = models.IntegerField()
+   friends = models.ManyToManyField('self', blank=True)
+
+   def __unicode__(self):
+      return self.name
+
+class Publisher(models.Model):
+   name = models.CharField(max_length=300)
+   num_awards = models.IntegerField()
+
+   def __unicode__(self):
+      return self.name
+
+class Book(models.Model):
+   isbn = models.CharField(max_length=9)
+   name = models.CharField(max_length=300)
+   pages = models.IntegerField()
+   rating = models.FloatField()
+   price = models.DecimalField(decimal_places=2, max_digits=6)
+   authors = models.ManyToManyField(Author)
+   publisher = models.ForeignKey(Publisher)
+   pubdate = models.DateField()
+
+   class Meta:
+       ordering = ('name',)
+
+   def __unicode__(self):
+      return self.name
+
+class Store(models.Model):
+   name = models.CharField(max_length=300)
+   books = models.ManyToManyField(Book)
+   original_opening = models.DateTimeField()
+   friday_night_closing = models.TimeField()
+
+   def __unicode__(self):
+      return self.name
+
+#Extra does not play well with values. Modify the tests if/when this is fixed.
+__test__ = {'API_TESTS': """
+>>> from django.core import management
+>>> from django.db.models import get_app
+
+# Reset the database representation of this app.
+# This will return the database to a clean initial state.
+>>> management.call_command('flush', verbosity=0, interactive=False)
+
+>>> from django.db.models import Avg, Sum, Count, Max, Min, StdDev, Variance
+
+# Ordering requests are ignored
+>>> Author.objects.all().order_by('name').aggregate(Avg('age'))
+{'age__avg': 37.4...}
+
+# Implicit ordering is also ignored
+>>> Book.objects.all().aggregate(Sum('pages'))
+{'pages__sum': 3703}
+
+# Baseline results
+>>> Book.objects.all().aggregate(Sum('pages'), Avg('pages'))
+{'pages__sum': 3703, 'pages__avg': 617.1...}
+
+# Empty values query doesn't affect grouping or results
+>>> Book.objects.all().values().aggregate(Sum('pages'), Avg('pages'))
+{'pages__sum': 3703, 'pages__avg': 617.1...}
+
+# Aggregate overrides extra selected column
+>>> Book.objects.all().extra(select={'price_per_page' : 'price / pages'}).aggregate(Sum('pages'))
+{'pages__sum': 3703}
+
+# Annotations get combined with extra select clauses
+>>> sorted(Book.objects.all().annotate(mean_auth_age=Avg('authors__age')).extra(select={'manufacture_cost' : 'price * .5'}).get(pk=2).__dict__.items())
+[('id', 2), ('isbn', u'067232959'), ('manufacture_cost', ...11.545...), ('mean_auth_age', 45.0), ('name', u'Sams Teach Yourself Django in 24 Hours'), ('pages', 528), ('price', Decimal("23.09")), ('pubdate', datetime.date(2008, 3, 3)), ('publisher_id', 2), ('rating', 3.0)]
+
+# Order of the annotate/extra in the query doesn't matter
+>>> sorted(Book.objects.all().extra(select={'manufacture_cost' : 'price * .5'}).annotate(mean_auth_age=Avg('authors__age')).get(pk=2).__dict__.items())
+[('id', 2), ('isbn', u'067232959'), ('manufacture_cost', ...11.545...), ('mean_auth_age', 45.0), ('name', u'Sams Teach Yourself Django in 24 Hours'), ('pages', 528), ('price', Decimal("23.09")), ('pubdate', datetime.date(2008, 3, 3)), ('publisher_id', 2), ('rating', 3.0)]
+
+# Values queries can be combined with annotate and extra
+>>> sorted(Book.objects.all().annotate(mean_auth_age=Avg('authors__age')).extra(select={'manufacture_cost' : 'price * .5'}).values().get(pk=2).items())
+[('id', 2), ('isbn', u'067232959'), ('manufacture_cost', ...11.545...), ('mean_auth_age', 45.0), ('name', u'Sams Teach Yourself Django in 24 Hours'), ('pages', 528), ('price', Decimal("23.09")), ('pubdate', datetime.date(2008, 3, 3)), ('publisher_id', 2), ('rating', 3.0)]
+
+# The order of the values, annotate and extra clauses doesn't matter
+>>> sorted(Book.objects.all().values().annotate(mean_auth_age=Avg('authors__age')).extra(select={'manufacture_cost' : 'price * .5'}).get(pk=2).items())
+[('id', 2), ('isbn', u'067232959'), ('manufacture_cost', ...11.545...), ('mean_auth_age', 45.0), ('name', u'Sams Teach Yourself Django in 24 Hours'), ('pages', 528), ('price', Decimal("23.09")), ('pubdate', datetime.date(2008, 3, 3)), ('publisher_id', 2), ('rating', 3.0)]
+
+# A values query that selects specific columns reduces the output
+>>> sorted(Book.objects.all().annotate(mean_auth_age=Avg('authors__age')).extra(select={'price_per_page' : 'price / pages'}).values('name').get(pk=1).items())
+[('mean_auth_age', 34.5), ('name', u'The Definitive Guide to Django: Web Development Done Right')]
+
+# The annotations are added to values output if values() precedes annotate()
+>>> sorted(Book.objects.all().values('name').annotate(mean_auth_age=Avg('authors__age')).extra(select={'price_per_page' : 'price / pages'}).get(pk=1).items())
+[('mean_auth_age', 34.5), ('name', u'The Definitive Guide to Django: Web Development Done Right')]
+
+# Check that all of the objects are getting counted (allow_nulls) and that values respects the amount of objects
+>>> len(Author.objects.all().annotate(Avg('friends__age')).values())
+9
+
+# Check that consecutive calls to annotate accumulate in the query
+>>> Book.objects.values('price').annotate(oldest=Max('authors__age')).order_by('oldest', 'price').annotate(Max('publisher__num_awards'))
+[{'price': Decimal("30..."), 'oldest': 35, 'publisher__num_awards__max': 3}, {'price': Decimal("29.69"), 'oldest': 37, 'publisher__num_awards__max': 7}, {'price': Decimal("23.09"), 'oldest': 45, 'publisher__num_awards__max': 1}, {'price': Decimal("75..."), 'oldest': 57, 'publisher__num_awards__max': 9}, {'price': Decimal("82.8..."), 'oldest': 57, 'publisher__num_awards__max': 7}]
+
+# Aggregates can be composed over annotations.
+# The return type is derived from the composed aggregate
+>>> Book.objects.all().annotate(num_authors=Count('authors__id')).aggregate(Max('pages'), Max('price'), Sum('num_authors'), Avg('num_authors'))
+{'num_authors__sum': 10, 'num_authors__avg': 1.66..., 'pages__max': 1132, 'price__max': Decimal("82.80")}
+
+# Bad field requests in aggregates are caught and reported
+>>> Book.objects.all().aggregate(num_authors=Count('foo'))
+Traceback (most recent call last):
+...
+FieldError: Cannot resolve keyword 'foo' into field. Choices are: authors, id, isbn, name, pages, price, pubdate, publisher, rating, store
+
+>>> Book.objects.all().annotate(num_authors=Count('foo'))
+Traceback (most recent call last):
+...
+FieldError: Cannot resolve keyword 'foo' into field. Choices are: authors, id, isbn, name, pages, price, pubdate, publisher, rating, store
+
+>>> Book.objects.all().annotate(num_authors=Count('authors__id')).aggregate(Max('foo'))
+Traceback (most recent call last):
+...
+FieldError: Cannot resolve keyword 'foo' into field. Choices are: authors, id, isbn, name, pages, price, pubdate, publisher, rating, store, num_authors
+
+# Old-style count aggregations can be mixed with new-style
+>>> Book.objects.annotate(num_authors=Count('authors')).count()
+6
+
+# Non-ordinal, non-computed Aggregates over annotations correctly inherit
+# the annotation's internal type if the annotation is ordinal or computed
+>>> Book.objects.annotate(num_authors=Count('authors')).aggregate(Max('num_authors'))
+{'num_authors__max': 3}
+
+>>> Publisher.objects.annotate(avg_price=Avg('book__price')).aggregate(Max('avg_price'))
+{'avg_price__max': 75.0...}
+
+# Aliases are quoted to protected aliases that might be reserved names
+>>> Book.objects.aggregate(number=Max('pages'), select=Max('pages'))
+{'number': 1132, 'select': 1132}
+
+
+"""
+}
+
+if settings.DATABASE_ENGINE != 'sqlite3':
+    __test__['API_TESTS'] += """
+# Stddev and Variance are not guaranteed to be available for SQLite.
+
+>>> Book.objects.aggregate(StdDev('pages'))
+{'pages__stddev': 311.46...}
+
+>>> Book.objects.aggregate(StdDev('rating'))
+{'rating__stddev': 0.60...}
+
+>>> Book.objects.aggregate(StdDev('price'))
+{'price__stddev': 24.16...}
+
+
+>>> Book.objects.aggregate(StdDev('pages', sample=True))
+{'pages__stddev': 341.19...}
+
+>>> Book.objects.aggregate(StdDev('rating', sample=True))
+{'rating__stddev': 0.66...}
+
+>>> Book.objects.aggregate(StdDev('price', sample=True))
+{'price__stddev': 26.46...}
+
+
+>>> Book.objects.aggregate(Variance('pages'))
+{'pages__variance': 97010.80...}
+
+>>> Book.objects.aggregate(Variance('rating'))
+{'rating__variance': 0.36...}
+
+>>> Book.objects.aggregate(Variance('price'))
+{'price__variance': 583.77...}
+
+
+>>> Book.objects.aggregate(Variance('pages', sample=True))
+{'pages__variance': 116412.96...}
+
+>>> Book.objects.aggregate(Variance('rating', sample=True))
+{'rating__variance': 0.44...}
+
+>>> Book.objects.aggregate(Variance('price', sample=True))
+{'price__variance': 700.53...}
+
+
+"""
+