瀏覽代碼

Do simple pack checking during receive-pack.

Call obj.check() for each object in a pack. This requires passing the
Pack object back from the various commit callbacks.

In the future, we might consider not even moving the pack in until after
has been checked. At the moment, however, the only way to complete a
thin pack is to move it in, so we must do that first. This results in a
garbage pack if the check fails, which would have to be cleaned up by
an eventual GC. However, since only corrupt objects (not packs) can be
written to disk, with the SHA of their corrupt contents, the chances of
actually hitting a corrupt object in practice are very small.

Change-Id: Ib6c6ae2846f77f6a6cd4849b9608c29886258673
Dave Borowitz 15 年之前
父節點
當前提交
531f3f4c10
共有 3 個文件被更改,包括 34 次插入27 次删除
  1. 14 5
      dulwich/object_store.py
  2. 3 1
      dulwich/pack.py
  3. 17 21
      dulwich/server.py

+ 14 - 5
dulwich/object_store.py

@@ -315,13 +315,14 @@ class PackBasedObjectStore(BaseObjectStore):
         """Add a set of objects to this object store.
 
         :param objects: Iterable over objects, should support __len__.
+        :return: Pack object of the objects written.
         """
         if len(objects) == 0:
             # Don't bother writing an empty pack file
             return
         f, commit = self.add_pack()
         write_pack_data(f, objects, len(objects))
-        commit()
+        return commit()
 
 
 class DiskObjectStore(PackBasedObjectStore):
@@ -407,7 +408,9 @@ class DiskObjectStore(PackBasedObjectStore):
         newbasename = os.path.join(self.pack_dir, "pack-%s" % pack_sha)
         os.rename(temppath+".pack", newbasename+".pack")
         os.rename(temppath+".idx", newbasename+".idx")
-        self._add_known_pack(Pack(newbasename))
+        final_pack = Pack(newbasename)
+        self._add_known_pack(final_pack)
+        return final_pack
 
     def move_in_pack(self, path):
         """Move a specific file containing a pack into the pack directory.
@@ -424,7 +427,9 @@ class DiskObjectStore(PackBasedObjectStore):
         write_pack_index_v2(basename+".idx", entries, p.get_stored_checksum())
         p.close()
         os.rename(path, basename + ".pack")
-        self._add_known_pack(Pack(basename))
+        final_pack = Pack(basename)
+        self._add_known_pack(final_pack)
+        return final_pack
 
     def add_thin_pack(self):
         """Add a new thin pack to this object store.
@@ -438,7 +443,9 @@ class DiskObjectStore(PackBasedObjectStore):
             os.fsync(fd)
             f.close()
             if os.path.getsize(path) > 0:
-                self.move_in_thin_pack(path)
+                return self.move_in_thin_pack(path)
+            else:
+                return None
         return f, commit
 
     def add_pack(self):
@@ -453,7 +460,9 @@ class DiskObjectStore(PackBasedObjectStore):
             os.fsync(fd)
             f.close()
             if os.path.getsize(path) > 0:
-                self.move_in_pack(path)
+                return self.move_in_pack(path)
+            else:
+                return None
         return f, commit
 
     def add_object(self, obj):

+ 3 - 1
dulwich/pack.py

@@ -316,7 +316,6 @@ class PackIndex(object):
 
     def check(self):
         """Check that the stored checksum matches the actual checksum."""
-        # TODO: Check pack contents, too
         actual = self.calculate_checksum()
         stored = self.get_stored_checksum()
         if actual != stored:
@@ -1233,6 +1232,9 @@ class Pack(object):
         """
         self.index.check()
         self.data.check()
+        for obj in self.iterobjects():
+            obj.check()
+        # TODO: object connectivity checks
 
     def get_stored_checksum(self):
         return self.data.get_stored_checksum()

+ 17 - 21
dulwich/server.py

@@ -36,6 +36,7 @@ from dulwich.errors import (
     ApplyDeltaError,
     ChecksumMismatch,
     GitProtocolError,
+    ObjectFormatException,
     )
 from dulwich.misc import (
     make_sha,
@@ -643,48 +644,43 @@ class ReceivePackHandler(Handler):
     def _apply_pack(self, refs):
         f, commit = self.repo.object_store.add_thin_pack()
         all_exceptions = (IOError, OSError, ChecksumMismatch, ApplyDeltaError,
-                          AssertionError, socket.error, zlib.error)
+                          AssertionError, socket.error, zlib.error,
+                          ObjectFormatException)
         status = []
         unpack_error = None
         # TODO: more informative error messages than just the exception string
         try:
             PackStreamVerifier(self.proto, f).verify()
-        except all_exceptions, e:
-            unpack_error = str(e).replace('\n', '')
-        try:
-            commit()
-        except all_exceptions, e:
-            if not unpack_error:
-                unpack_error = str(e).replace('\n', '')
-
-        if unpack_error:
-            status.append(('unpack', unpack_error))
-        else:
+            p = commit()
+            if not p:
+                raise IOError('Failed to write pack')
+            p.check()
             status.append(('unpack', 'ok'))
+        except all_exceptions, e:
+            status.append(('unpack', str(e).replace('\n', '')))
+            # The pack may still have been moved in, but it may contain broken
+            # objects. We trust a later GC to clean it up.
 
         for oldsha, sha, ref in refs:
-            ref_error = None
+            ref_status = 'ok'
             try:
                 if sha == ZERO_SHA:
-                    if not self.has_capability('delete-refs'):
+                    if not delete_refs:
                         raise GitProtocolError(
                           'Attempted to delete refs without delete-refs '
                           'capability.')
                     try:
                         del self.repo.refs[ref]
                     except all_exceptions:
-                        ref_error = 'failed to delete'
+                        ref_status = 'failed to delete'
                 else:
                     try:
                         self.repo.refs[ref] = sha
                     except all_exceptions:
-                        ref_error = 'failed to write'
+                        ref_status = 'failed to write'
             except KeyError, e:
-                ref_error = 'bad ref'
-            if ref_error:
-                status.append((ref, ref_error))
-            else:
-                status.append((ref, 'ok'))
+                ref_status = 'bad ref'
+            status.append((ref, ref_status))
 
         return status