浏览代码

Do simple pack checking during receive-pack.

Call obj.check() for each object in a pack. This requires passing the
Pack object back from the various commit callbacks.

In the future, we might consider not even moving the pack in until after
has been checked. At the moment, however, the only way to complete a
thin pack is to move it in, so we must do that first. This results in a
garbage pack if the check fails, which would have to be cleaned up by
an eventual GC. However, since only corrupt objects (not packs) can be
written to disk, with the SHA of their corrupt contents, the chances of
actually hitting a corrupt object in practice are very small.

Change-Id: Ib6c6ae2846f77f6a6cd4849b9608c29886258673
Dave Borowitz 15 年之前
父节点
当前提交
531f3f4c10
共有 3 个文件被更改,包括 34 次插入27 次删除
  1. 14 5
      dulwich/object_store.py
  2. 3 1
      dulwich/pack.py
  3. 17 21
      dulwich/server.py

+ 14 - 5
dulwich/object_store.py

@@ -315,13 +315,14 @@ class PackBasedObjectStore(BaseObjectStore):
         """Add a set of objects to this object store.
         """Add a set of objects to this object store.
 
 
         :param objects: Iterable over objects, should support __len__.
         :param objects: Iterable over objects, should support __len__.
+        :return: Pack object of the objects written.
         """
         """
         if len(objects) == 0:
         if len(objects) == 0:
             # Don't bother writing an empty pack file
             # Don't bother writing an empty pack file
             return
             return
         f, commit = self.add_pack()
         f, commit = self.add_pack()
         write_pack_data(f, objects, len(objects))
         write_pack_data(f, objects, len(objects))
-        commit()
+        return commit()
 
 
 
 
 class DiskObjectStore(PackBasedObjectStore):
 class DiskObjectStore(PackBasedObjectStore):
@@ -407,7 +408,9 @@ class DiskObjectStore(PackBasedObjectStore):
         newbasename = os.path.join(self.pack_dir, "pack-%s" % pack_sha)
         newbasename = os.path.join(self.pack_dir, "pack-%s" % pack_sha)
         os.rename(temppath+".pack", newbasename+".pack")
         os.rename(temppath+".pack", newbasename+".pack")
         os.rename(temppath+".idx", newbasename+".idx")
         os.rename(temppath+".idx", newbasename+".idx")
-        self._add_known_pack(Pack(newbasename))
+        final_pack = Pack(newbasename)
+        self._add_known_pack(final_pack)
+        return final_pack
 
 
     def move_in_pack(self, path):
     def move_in_pack(self, path):
         """Move a specific file containing a pack into the pack directory.
         """Move a specific file containing a pack into the pack directory.
@@ -424,7 +427,9 @@ class DiskObjectStore(PackBasedObjectStore):
         write_pack_index_v2(basename+".idx", entries, p.get_stored_checksum())
         write_pack_index_v2(basename+".idx", entries, p.get_stored_checksum())
         p.close()
         p.close()
         os.rename(path, basename + ".pack")
         os.rename(path, basename + ".pack")
-        self._add_known_pack(Pack(basename))
+        final_pack = Pack(basename)
+        self._add_known_pack(final_pack)
+        return final_pack
 
 
     def add_thin_pack(self):
     def add_thin_pack(self):
         """Add a new thin pack to this object store.
         """Add a new thin pack to this object store.
@@ -438,7 +443,9 @@ class DiskObjectStore(PackBasedObjectStore):
             os.fsync(fd)
             os.fsync(fd)
             f.close()
             f.close()
             if os.path.getsize(path) > 0:
             if os.path.getsize(path) > 0:
-                self.move_in_thin_pack(path)
+                return self.move_in_thin_pack(path)
+            else:
+                return None
         return f, commit
         return f, commit
 
 
     def add_pack(self):
     def add_pack(self):
@@ -453,7 +460,9 @@ class DiskObjectStore(PackBasedObjectStore):
             os.fsync(fd)
             os.fsync(fd)
             f.close()
             f.close()
             if os.path.getsize(path) > 0:
             if os.path.getsize(path) > 0:
-                self.move_in_pack(path)
+                return self.move_in_pack(path)
+            else:
+                return None
         return f, commit
         return f, commit
 
 
     def add_object(self, obj):
     def add_object(self, obj):

+ 3 - 1
dulwich/pack.py

@@ -316,7 +316,6 @@ class PackIndex(object):
 
 
     def check(self):
     def check(self):
         """Check that the stored checksum matches the actual checksum."""
         """Check that the stored checksum matches the actual checksum."""
-        # TODO: Check pack contents, too
         actual = self.calculate_checksum()
         actual = self.calculate_checksum()
         stored = self.get_stored_checksum()
         stored = self.get_stored_checksum()
         if actual != stored:
         if actual != stored:
@@ -1233,6 +1232,9 @@ class Pack(object):
         """
         """
         self.index.check()
         self.index.check()
         self.data.check()
         self.data.check()
+        for obj in self.iterobjects():
+            obj.check()
+        # TODO: object connectivity checks
 
 
     def get_stored_checksum(self):
     def get_stored_checksum(self):
         return self.data.get_stored_checksum()
         return self.data.get_stored_checksum()

+ 17 - 21
dulwich/server.py

@@ -36,6 +36,7 @@ from dulwich.errors import (
     ApplyDeltaError,
     ApplyDeltaError,
     ChecksumMismatch,
     ChecksumMismatch,
     GitProtocolError,
     GitProtocolError,
+    ObjectFormatException,
     )
     )
 from dulwich.misc import (
 from dulwich.misc import (
     make_sha,
     make_sha,
@@ -643,48 +644,43 @@ class ReceivePackHandler(Handler):
     def _apply_pack(self, refs):
     def _apply_pack(self, refs):
         f, commit = self.repo.object_store.add_thin_pack()
         f, commit = self.repo.object_store.add_thin_pack()
         all_exceptions = (IOError, OSError, ChecksumMismatch, ApplyDeltaError,
         all_exceptions = (IOError, OSError, ChecksumMismatch, ApplyDeltaError,
-                          AssertionError, socket.error, zlib.error)
+                          AssertionError, socket.error, zlib.error,
+                          ObjectFormatException)
         status = []
         status = []
         unpack_error = None
         unpack_error = None
         # TODO: more informative error messages than just the exception string
         # TODO: more informative error messages than just the exception string
         try:
         try:
             PackStreamVerifier(self.proto, f).verify()
             PackStreamVerifier(self.proto, f).verify()
-        except all_exceptions, e:
-            unpack_error = str(e).replace('\n', '')
-        try:
-            commit()
-        except all_exceptions, e:
-            if not unpack_error:
-                unpack_error = str(e).replace('\n', '')
-
-        if unpack_error:
-            status.append(('unpack', unpack_error))
-        else:
+            p = commit()
+            if not p:
+                raise IOError('Failed to write pack')
+            p.check()
             status.append(('unpack', 'ok'))
             status.append(('unpack', 'ok'))
+        except all_exceptions, e:
+            status.append(('unpack', str(e).replace('\n', '')))
+            # The pack may still have been moved in, but it may contain broken
+            # objects. We trust a later GC to clean it up.
 
 
         for oldsha, sha, ref in refs:
         for oldsha, sha, ref in refs:
-            ref_error = None
+            ref_status = 'ok'
             try:
             try:
                 if sha == ZERO_SHA:
                 if sha == ZERO_SHA:
-                    if not self.has_capability('delete-refs'):
+                    if not delete_refs:
                         raise GitProtocolError(
                         raise GitProtocolError(
                           'Attempted to delete refs without delete-refs '
                           'Attempted to delete refs without delete-refs '
                           'capability.')
                           'capability.')
                     try:
                     try:
                         del self.repo.refs[ref]
                         del self.repo.refs[ref]
                     except all_exceptions:
                     except all_exceptions:
-                        ref_error = 'failed to delete'
+                        ref_status = 'failed to delete'
                 else:
                 else:
                     try:
                     try:
                         self.repo.refs[ref] = sha
                         self.repo.refs[ref] = sha
                     except all_exceptions:
                     except all_exceptions:
-                        ref_error = 'failed to write'
+                        ref_status = 'failed to write'
             except KeyError, e:
             except KeyError, e:
-                ref_error = 'bad ref'
-            if ref_error:
-                status.append((ref, ref_error))
-            else:
-                status.append((ref, 'ok'))
+                ref_status = 'bad ref'
+            status.append((ref, ref_status))
 
 
         return status
         return status