Browse Source

Fixed #16649 -- Refactored save_base logic

Model.save() will use UPDATE - if not updated - INSERT instead of
SELECT - if found UPDATE else INSERT. This should save a query when
updating, but will cost a little when inserting model with PK set.

Also fixed #17341 -- made sure .save() commits transactions only after
the whole model has been saved. This wasn't the case in model
inheritance situations.

The save_base implementation was refactored into multiple methods.
A typical chain for inherited save is:
save_base()
    _save_parents(self)
        for each parent:
            _save_parents(parent)
            _save_table(parent)
    _save_table(self)
Anssi Kääriäinen 12 years ago
parent
commit
6b4834952d

+ 115 - 101
django/db/models/base.py

@@ -545,125 +545,139 @@ class Model(six.with_metaclass(ModelBase)):
                        force_update=force_update, update_fields=update_fields)
     save.alters_data = True
 
-    def save_base(self, raw=False, cls=None, origin=None, force_insert=False,
+    def save_base(self, raw=False, force_insert=False,
                   force_update=False, using=None, update_fields=None):
         """
-        Does the heavy-lifting involved in saving. Subclasses shouldn't need to
-        override this method. It's separate from save() in order to hide the
-        need for overrides of save() to pass around internal-only parameters
-        ('raw', 'cls', and 'origin').
+        Handles the parts of saving which should be done only once per save,
+        yet need to be done in raw saves, too. This includes some sanity
+        checks and signal sending.
+
+        The 'raw' argument is telling save_base not to save any parent
+        models and not to do any changes to the values before save. This
+        is used by fixture loading.
         """
         using = using or router.db_for_write(self.__class__, instance=self)
         assert not (force_insert and (force_update or update_fields))
         assert update_fields is None or len(update_fields) > 0
-        if cls is None:
-            cls = self.__class__
-            meta = cls._meta
-            if not meta.proxy:
-                origin = cls
-        else:
-            meta = cls._meta
-
-        if origin and not meta.auto_created:
+        cls = origin = self.__class__
+        # Skip proxies, but keep the origin as the proxy model.
+        if cls._meta.proxy:
+            cls = cls._meta.concrete_model
+        meta = cls._meta
+        if not meta.auto_created:
             signals.pre_save.send(sender=origin, instance=self, raw=raw, using=using,
                                   update_fields=update_fields)
-
-        # If we are in a raw save, save the object exactly as presented.
-        # That means that we don't try to be smart about saving attributes
-        # that might have come from the parent class - we just save the
-        # attributes we have been given to the class we have been given.
-        # We also go through this process to defer the save of proxy objects
-        # to their actual underlying model.
-        if not raw or meta.proxy:
-            if meta.proxy:
-                org = cls
-            else:
-                org = None
-            for parent, field in meta.parents.items():
-                # At this point, parent's primary key field may be unknown
-                # (for example, from administration form which doesn't fill
-                # this field). If so, fill it.
-                if field and getattr(self, parent._meta.pk.attname) is None and getattr(self, field.attname) is not None:
-                    setattr(self, parent._meta.pk.attname, getattr(self, field.attname))
-
-                self.save_base(cls=parent, origin=org, using=using,
-                               update_fields=update_fields)
-
-                if field:
-                    setattr(self, field.attname, self._get_pk_val(parent._meta))
-                    # Since we didn't have an instance of the parent handy, we
-                    # set attname directly, bypassing the descriptor.
-                    # Invalidate the related object cache, in case it's been
-                    # accidentally populated. A fresh instance will be
-                    # re-built from the database if necessary.
-                    cache_name = field.get_cache_name()
-                    if hasattr(self, cache_name):
-                        delattr(self, cache_name)
-
-            if meta.proxy:
-                return
-
-        if not meta.proxy:
-            non_pks = [f for f in meta.local_fields if not f.primary_key]
-
-            if update_fields:
-                non_pks = [f for f in non_pks if f.name in update_fields or f.attname in update_fields]
-
-            with transaction.commit_on_success_unless_managed(using=using):
-                # First, try an UPDATE. If that doesn't update anything, do an INSERT.
-                pk_val = self._get_pk_val(meta)
-                pk_set = pk_val is not None
-                record_exists = True
-                manager = cls._base_manager
-                if pk_set:
-                    # Determine if we should do an update (pk already exists, forced update,
-                    # no force_insert)
-                    if ((force_update or update_fields) or (not force_insert and
-                            manager.using(using).filter(pk=pk_val).exists())):
-                        if force_update or non_pks:
-                            values = [(f, None, (raw and getattr(self, f.attname) or f.pre_save(self, False))) for f in non_pks]
-                            if values:
-                                rows = manager.using(using).filter(pk=pk_val)._update(values)
-                                if force_update and not rows:
-                                    raise DatabaseError("Forced update did not affect any rows.")
-                                if update_fields and not rows:
-                                    raise DatabaseError("Save with update_fields did not affect any rows.")
-                    else:
-                        record_exists = False
-                if not pk_set or not record_exists:
-                    if meta.order_with_respect_to:
-                        # If this is a model with an order_with_respect_to
-                        # autopopulate the _order field
-                        field = meta.order_with_respect_to
-                        order_value = manager.using(using).filter(**{field.name: getattr(self, field.attname)}).count()
-                        self._order = order_value
-
-                    fields = meta.local_fields
-                    if not pk_set:
-                        if force_update or update_fields:
-                            raise ValueError("Cannot force an update in save() with no primary key.")
-                        fields = [f for f in fields if not isinstance(f, AutoField)]
-
-                    record_exists = False
-
-                    update_pk = bool(meta.has_auto_field and not pk_set)
-                    result = manager._insert([self], fields=fields, return_id=update_pk, using=using, raw=raw)
-
-                    if update_pk:
-                        setattr(self, meta.pk.attname, result)
-
+        with transaction.commit_on_success_unless_managed(using=using, savepoint=False):
+            if not raw:
+                self._save_parents(cls, using, update_fields)
+            updated = self._save_table(raw, cls, force_insert, force_update, using, update_fields)
         # Store the database on which the object was saved
         self._state.db = using
         # Once saved, this is no longer a to-be-added instance.
         self._state.adding = False
 
         # Signal that the save is complete
-        if origin and not meta.auto_created:
-            signals.post_save.send(sender=origin, instance=self, created=(not record_exists),
+        if not meta.auto_created:
+            signals.post_save.send(sender=origin, instance=self, created=(not updated),
                                    update_fields=update_fields, raw=raw, using=using)
 
     save_base.alters_data = True
 
+    def _save_parents(self, cls, using, update_fields):
+        """
+        Saves all the parents of cls using values from self.
+        """
+        meta = cls._meta
+        for parent, field in meta.parents.items():
+            # Make sure the link fields are synced between parent and self.
+            if (field and getattr(self, parent._meta.pk.attname) is None
+                    and getattr(self, field.attname) is not None):
+                setattr(self, parent._meta.pk.attname, getattr(self, field.attname))
+            self._save_parents(cls=parent, using=using, update_fields=update_fields)
+            self._save_table(cls=parent, using=using, update_fields=update_fields)
+            # Set the parent's PK value to self.
+            if field:
+                setattr(self, field.attname, self._get_pk_val(parent._meta))
+                # Since we didn't have an instance of the parent handy set
+                # attname directly, bypassing the descriptor. Invalidate
+                # the related object cache, in case it's been accidentally
+                # populated. A fresh instance will be re-built from the
+                # database if necessary.
+                cache_name = field.get_cache_name()
+                if hasattr(self, cache_name):
+                    delattr(self, cache_name)
+
+    def _save_table(self, raw=False, cls=None, force_insert=False,
+                    force_update=False, using=None, update_fields=None):
+        """
+        Does the heavy-lifting involved in saving. Updates or inserts the data
+        for a single table.
+        """
+        meta = cls._meta
+        non_pks = [f for f in meta.local_fields if not f.primary_key]
+
+        if update_fields:
+            non_pks = [f for f in non_pks
+                       if f.name in update_fields or f.attname in update_fields]
+
+        pk_val = self._get_pk_val(meta)
+        pk_set = pk_val is not None
+        if not pk_set and (force_update or update_fields):
+            raise ValueError("Cannot force an update in save() with no primary key.")
+        updated = False
+        # If possible, try an UPDATE. If that doesn't update anything, do an INSERT.
+        if pk_set and not force_insert:
+            base_qs = cls._base_manager.using(using)
+            values = [(f, None, (raw and getattr(self, f.attname) or f.pre_save(self, False)))
+                      for f in non_pks]
+            if not values:
+                # We can end up here when saving a model in inheritance chain where
+                # update_fields doesn't target any field in current model. In that
+                # case we just say the update succeeded. Another case ending up here
+                # is a model with just PK - in that case check that the PK still
+                # exists.
+                updated = update_fields is not None or base_qs.filter(pk=pk_val).exists()
+            else:
+                updated = self._do_update(base_qs, using, pk_val, values)
+            if force_update and not updated:
+                raise DatabaseError("Forced update did not affect any rows.")
+            if update_fields and not updated:
+                raise DatabaseError("Save with update_fields did not affect any rows.")
+        if not updated:
+            if meta.order_with_respect_to:
+                # If this is a model with an order_with_respect_to
+                # autopopulate the _order field
+                field = meta.order_with_respect_to
+                order_value = cls._base_manager.using(using).filter(
+                    **{field.name: getattr(self, field.attname)}).count()
+                self._order = order_value
+
+            fields = meta.local_fields
+            if not pk_set:
+                fields = [f for f in fields if not isinstance(f, AutoField)]
+
+            update_pk = bool(meta.has_auto_field and not pk_set)
+            result = self._do_insert(cls._base_manager, using, fields, update_pk, raw)
+            if update_pk:
+                setattr(self, meta.pk.attname, result)
+        return updated
+
+    def _do_update(self, base_qs, using, pk_val, values):
+        """
+        This method will try to update the model. If the model was updated (in
+        the sense that an update query was done and a matching row was found
+        from the DB) the method will return True.
+        """
+        return base_qs.filter(pk=pk_val)._update(values) > 0
+
+    def _do_insert(self, manager, using, fields, update_pk, raw):
+        """
+        Do an INSERT. If update_pk is defined then this method should return
+        the new pk for the model.
+        """
+        return manager._insert([self], fields=fields, return_id=update_pk,
+                               using=using, raw=raw)
+
     def delete(self, using=None):
         using = using or router.db_for_write(self.__class__, instance=self)
         assert self._get_pk_val() is not None, "%s object can't be deleted because its %s attribute is set to None." % (self._meta.object_name, self._meta.pk.attname)

+ 7 - 6
docs/ref/models/instances.txt

@@ -292,12 +292,13 @@ follows this algorithm:
 
 * If the object's primary key attribute is set to a value that evaluates to
   ``True`` (i.e., a value other than ``None`` or the empty string), Django
-  executes a ``SELECT`` query to determine whether a record with the given
-  primary key already exists.
-* If the record with the given primary key does already exist, Django
-  executes an ``UPDATE`` query.
-* If the object's primary key attribute is *not* set, or if it's set but a
-  record doesn't exist, Django executes an ``INSERT``.
+  executes an ``UPDATE``.
+* If the object's primary key attribute is *not* set or if the ``UPDATE``
+  didn't update anything, Django executes an ``INSERT``.
+
+.. versionchanged:: 1.6
+  Previously Django used ``SELECT`` - if not found ``INSERT`` else ``UPDATE``
+  algorithm. The old algorithm resulted in one more query in ``UPDATE`` case.
 
 The one gotcha here is that you should be careful not to specify a primary-key
 value explicitly when saving new objects, if you cannot guarantee the

+ 4 - 0
docs/releases/1.6.txt

@@ -150,6 +150,10 @@ Minor features
 * Generic :class:`~django.contrib.gis.db.models.GeometryField` is now editable
   with the OpenLayers widget in the admin.
 
+* The :meth:`Model.save() <django.db.models.Model.save()>` will do
+  ``UPDATE`` - if not updated - ``INSERT`` instead of ``SELECT`` - if not
+  found ``INSERT`` else ``UPDATE`` in case the model's primary key is set.
+
 Backwards incompatible changes in 1.6
 =====================================
 

+ 29 - 2
tests/basic/tests.py

@@ -1,11 +1,13 @@
 from __future__ import absolute_import, unicode_literals
 
 from datetime import datetime
+import threading
 
 from django.core.exceptions import ObjectDoesNotExist, MultipleObjectsReturned, FieldError
+from django.db import connections, DEFAULT_DB_ALIAS
 from django.db.models.fields import Field, FieldDoesNotExist
 from django.db.models.query import QuerySet, EmptyQuerySet, ValuesListQuerySet
-from django.test import TestCase, skipIfDBFeature, skipUnlessDBFeature
+from django.test import TestCase, TransactionTestCase, skipIfDBFeature, skipUnlessDBFeature
 from django.utils import six
 from django.utils.translation import ugettext_lazy
 
@@ -690,4 +692,29 @@ class ModelTest(TestCase):
     def test_invalid_qs_list(self):
         qs = Article.objects.order_by('invalid_column')
         self.assertRaises(FieldError, list, qs)
-        self.assertRaises(FieldError, list, qs)
+        self.assertRaises(FieldError, list, qs)
+
+class ConcurrentSaveTests(TransactionTestCase):
+    @skipUnlessDBFeature('test_db_allows_multiple_connections')
+    def test_concurrent_delete_with_save(self):
+        """
+        Test fetching, deleting and finally saving an object - we should get
+        an insert in this case.
+        """
+        a = Article.objects.create(headline='foo', pub_date=datetime.now())
+        exceptions = []
+        def deleter():
+            try:
+                # Do not delete a directly - doing so alters its state.
+                Article.objects.filter(pk=a.pk).delete()
+                connections[DEFAULT_DB_ALIAS].commit_unless_managed()
+            except Exception as e:
+                exceptions.append(e)
+            finally:
+                connections[DEFAULT_DB_ALIAS].close()
+        self.assertEqual(len(exceptions), 0)
+        t = threading.Thread(target=deleter)
+        t.start()
+        t.join()
+        a.save()
+        self.assertEqual(Article.objects.get(pk=a.pk).headline, 'foo')

+ 1 - 1
tests/model_inheritance/tests.py

@@ -294,7 +294,7 @@ class ModelInheritanceTests(TestCase):
             rating=4,
             chef=c
         )
-        with self.assertNumQueries(6):
+        with self.assertNumQueries(3):
             ir.save()
 
     def test_update_parent_filtering(self):

+ 2 - 0
tests/transactions_regress/models.py

@@ -4,6 +4,8 @@ from django.db import models
 class Mod(models.Model):
     fld = models.IntegerField()
 
+class SubMod(Mod):
+    cnt = models.IntegerField(unique=True)
 
 class M2mA(models.Model):
     others = models.ManyToManyField('M2mB')

+ 21 - 3
tests/transactions_regress/tests.py

@@ -1,6 +1,7 @@
 from __future__ import absolute_import
 
-from django.db import connection, connections, transaction, DEFAULT_DB_ALIAS, DatabaseError
+from django.db import (connection, connections, transaction, DEFAULT_DB_ALIAS, DatabaseError,
+                       IntegrityError)
 from django.db.transaction import commit_on_success, commit_manually, TransactionManagementError
 from django.test import TransactionTestCase, skipUnlessDBFeature
 from django.test.utils import override_settings
@@ -8,8 +9,25 @@ from django.utils.unittest import skipIf, skipUnless
 
 from transactions.tests import IgnorePendingDeprecationWarningsMixin
 
-from .models import Mod, M2mA, M2mB
-
+from .models import Mod, M2mA, M2mB, SubMod
+
+class ModelInheritanceTests(TransactionTestCase):
+    def test_save(self):
+        # First, create a SubMod, then try to save another with conflicting
+        # cnt field. The problem was that transactions were committed after
+        # every parent save when not in managed transaction. As the cnt
+        # conflict is in the second model, we can check if the first save
+        # was committed or not.
+        SubMod(fld=1, cnt=1).save()
+        # We should have committed the transaction for the above - assert this.
+        connection.rollback()
+        self.assertEqual(SubMod.objects.count(), 1)
+        try:
+            SubMod(fld=2, cnt=1).save()
+        except IntegrityError:
+            connection.rollback()
+        self.assertEqual(SubMod.objects.count(), 1)
+        self.assertEqual(Mod.objects.count(), 1)
 
 class TestTransactionClosing(IgnorePendingDeprecationWarningsMixin, TransactionTestCase):
     """