瀏覽代碼

Add basic repack implementation.

This doesn't have any of the knobs that ``git repack`` currently has,
it's fairly inefficient with memory, and it's not yet automatically
triggered.
Jelmer Vernooij 7 年之前
父節點
當前提交
5770657d64
共有 4 個文件被更改,包括 63 次插入1 次删除
  1. 3 0
      NEWS
  2. 29 0
      dulwich/object_store.py
  3. 8 0
      dulwich/pack.py
  4. 23 1
      dulwich/tests/test_object_store.py

+ 3 - 0
NEWS

@@ -20,6 +20,9 @@
   * Add ``dulwich.object_store.commit_tree_changes`` to incrementally
     commit changes to a tree structure. (Jelmer Vernooij)
 
+  * Add basic ``PackBasedObjectStore.repack`` method.
+    (Jelmer Vernooij, #296, #549)
+
 0.18.2	2017-08-01
 
  TEST FIXES

+ 29 - 0
dulwich/object_store.py

@@ -326,6 +326,9 @@ class PackBasedObjectStore(BaseObjectStore):
     def _remove_loose_object(self, sha):
         raise NotImplementedError(self._remove_loose_object)
 
+    def _remove_pack(self, name):
+        raise NotImplementedError(self._remove_pack)
+
     def pack_loose_objects(self):
         """Pack loose objects.
 
@@ -339,6 +342,28 @@ class PackBasedObjectStore(BaseObjectStore):
             self._remove_loose_object(obj.id)
         return len(objects)
 
+    def repack(self):
+        """Repack the packs in this repository.
+
+        Note that this implementation is fairly naive and currently keeps all
+        objects in memory while it repacks.
+        """
+        loose_objects = set()
+        for sha in self._iter_loose_objects():
+            loose_objects.add(self._get_loose_object(sha))
+        objects = {(obj, None) for obj in loose_objects}
+        old_packs = list(self.packs)
+        for pack in old_packs:
+            objects.update((obj, None) for obj in pack.iterobjects())
+
+        self.add_objects(objects)
+
+        for obj in loose_objects:
+            self._remove_loose_object(obj.id)
+        for pack in old_packs:
+            self._remove_pack(pack)
+        return len(objects)
+
     def __iter__(self):
         """Iterate over the SHAs that are present in this store."""
         iterables = (list(self.packs) + [self._iter_loose_objects()] +
@@ -532,6 +557,10 @@ class DiskObjectStore(PackBasedObjectStore):
     def _remove_loose_object(self, sha):
         os.remove(self._get_shafile_path(sha))
 
+    def _remove_pack(self, pack):
+        os.remove(pack.data.path)
+        os.remove(pack.index.path)
+
     def _get_pack_basepath(self, entries):
         suffix = iter_sha1(entry[0] for entry in entries)
         # TODO: Handle self.pack_dir being bytes

+ 8 - 0
dulwich/pack.py

@@ -473,6 +473,10 @@ class FilePackIndex(PackIndex):
         else:
             self._contents, self._size = (contents, size)
 
+    @property
+    def path(self):
+        return self._filename
+
     def __eq__(self, other):
         # Quick optimization:
         if (isinstance(other, FilePackIndex) and
@@ -1009,6 +1013,10 @@ class PackData(object):
     def filename(self):
         return os.path.basename(self._filename)
 
+    @property
+    def path(self):
+        return self._filename
+
     @classmethod
     def from_file(cls, file, size):
         return cls(str(file), file=file, size=size)

+ 23 - 1
dulwich/tests/test_object_store.py

@@ -272,11 +272,33 @@ class PackBasedObjectStoreTests(ObjectStoreTests):
         self.store.add_object(b1)
         b2 = make_object(Blob, data=b"more yummy data")
         self.store.add_object(b2)
-        self.assertEqual([], list(self.store.packs))
+        b3 = make_object(Blob, data=b"even more yummy data")
+        b4 = make_object(Blob, data=b"and more yummy data")
+        self.store.add_objects([(b3, None), (b4, None)])
+        self.assertEqual({b1.id, b2.id, b3.id, b4.id}, set(self.store))
+        self.assertEqual(1, len(self.store.packs))
         self.assertEqual(2, self.store.pack_loose_objects())
         self.assertNotEqual([], list(self.store.packs))
         self.assertEqual(0, self.store.pack_loose_objects())
 
+    def test_repack(self):
+        b1 = make_object(Blob, data=b"yummy data")
+        self.store.add_object(b1)
+        b2 = make_object(Blob, data=b"more yummy data")
+        self.store.add_object(b2)
+        b3 = make_object(Blob, data=b"even more yummy data")
+        b4 = make_object(Blob, data=b"and more yummy data")
+        self.store.add_objects([(b3, None), (b4, None)])
+        b5 = make_object(Blob, data=b"and more data")
+        b6 = make_object(Blob, data=b"and some more data")
+        self.store.add_objects([(b5, None), (b6, None)])
+        self.assertEqual({b1.id, b2.id, b3.id, b4.id, b5.id, b6.id},
+                         set(self.store))
+        self.assertEqual(2, len(self.store.packs))
+        self.assertEqual(6, self.store.repack())
+        self.assertNotEqual(1, len(self.store.packs))
+        self.assertEqual(0, self.store.pack_loose_objects())
+
 
 class DiskObjectStoreTests(PackBasedObjectStoreTests, TestCase):