Prechádzať zdrojové kódy

Merge support for large pack index files.

Jelmer Vernooij 12 rokov pred
rodič
commit
30671cdde4
3 zmenil súbory, kde vykonal 48 pridanie a 8 odobranie
  1. 2 0
      NEWS
  2. 19 8
      dulwich/pack.py
  3. 27 0
      dulwich/tests/test_pack.py

+ 2 - 0
NEWS

@@ -13,6 +13,8 @@
   * HTTPGitApplication now takes an optional
     `fallback_app` argument. (Jonas Haag, issue #67)
 
+  * Support for large pack index files. (Jameson Nash)
+
  TESTING
 
   * Make index entry tests a little bit less strict, to cope with

+ 19 - 8
dulwich/pack.py

@@ -615,6 +615,8 @@ class PackIndex2(FilePackIndex):
         self._crc32_table_offset = self._name_table_offset + 20 * len(self)
         self._pack_offset_table_offset = (self._crc32_table_offset +
                                           4 * len(self))
+        self._pack_offset_largetable_offset = (self._pack_offset_table_offset +
+                                          4 * len(self))
 
     def _unpack_entry(self, i):
         return (self._unpack_name(i), self._unpack_offset(i),
@@ -626,7 +628,11 @@ class PackIndex2(FilePackIndex):
 
     def _unpack_offset(self, i):
         offset = self._pack_offset_table_offset + i * 4
-        return unpack_from('>L', self._contents, offset)[0]
+        offset = unpack_from('>L', self._contents, offset)[0]
+        if offset & (2**31):
+            offset = self._pack_offset_largetable_offset + (offset&(2**31-1)) * 8L
+            offset = unpack_from('>Q', self._contents, offset)[0]
+        return offset
 
     def _unpack_crc32_checksum(self, i):
         return unpack_from('>L', self._contents,
@@ -1036,8 +1042,8 @@ class PackData(object):
         if type == OFS_DELTA:
             (delta_offset, delta) = obj
             # TODO: clean up asserts and replace with nicer error messages
-            assert isinstance(offset, int)
-            assert isinstance(delta_offset, int)
+            assert isinstance(offset, int) or isinstance(offset, long)
+            assert isinstance(delta_offset, int) or isinstance(offset, long)
             base_offset = offset-delta_offset
             type, base_obj = self.get_object_at(base_offset)
             assert isinstance(type, int)
@@ -1560,6 +1566,8 @@ def write_pack_index_v1(f, entries, pack_checksum):
         f.write(struct.pack('>L', fan_out_table[i]))
         fan_out_table[i+1] += fan_out_table[i]
     for (name, offset, entry_checksum) in entries:
+        if not (offset <= 0xffffffff):
+            raise TypeError("pack format 1 only supports offsets < 2Gb")
         f.write(struct.pack('>L20s', offset, name))
     assert len(pack_checksum) == 20
     f.write(pack_checksum)
@@ -1707,6 +1715,7 @@ def write_pack_index_v2(f, entries, pack_checksum):
     for (name, offset, entry_checksum) in entries:
         fan_out_table[ord(name[0])] += 1
     # Fan-out table
+    largetable = []
     for i in range(0x100):
         f.write(struct.pack('>L', fan_out_table[i]))
         fan_out_table[i+1] += fan_out_table[i]
@@ -1715,9 +1724,13 @@ def write_pack_index_v2(f, entries, pack_checksum):
     for (name, offset, entry_checksum) in entries:
         f.write(struct.pack('>L', entry_checksum))
     for (name, offset, entry_checksum) in entries:
-        # FIXME: handle if MSBit is set in offset
-        f.write(struct.pack('>L', offset))
-    # FIXME: handle table for pack files > 8 Gb
+        if offset < 2**31:
+            f.write(struct.pack('>L', offset))
+        else:
+            f.write(struct.pack('>L', 2**31 + len(largetable)))
+            largetable.append(offset)
+    for offset in largetable:
+        f.write(struct.pack('>Q', offset))
     assert len(pack_checksum) == 20
     f.write(pack_checksum)
     return f.write_sha()
@@ -1828,8 +1841,6 @@ class Pack(object):
     def get_raw(self, sha1):
         offset = self.index.object_index(sha1)
         obj_type, obj = self.data.get_object_at(offset)
-        if type(offset) is long:
-          offset = int(offset)
         type_num, chunks = self.data.resolve_object(offset, obj_type, obj)
         return type_num, ''.join(chunks)
 

+ 27 - 0
dulwich/tests/test_pack.py

@@ -476,6 +476,30 @@ class BaseTestPackIndexWriting(object):
         self.assertEqual(idx.get_pack_checksum(), pack_checksum)
         self.assertEqual(0, len(idx))
 
+    def test_large(self):
+        entry1_sha = hex_to_sha('4e6388232ec39792661e2e75db8fb117fc869ce6')
+        entry2_sha = hex_to_sha('e98f071751bd77f59967bfa671cd2caebdccc9a2')
+        entries = [(entry1_sha, 0xf2972d0830529b87, 24),
+                   (entry2_sha, (~0xf2972d0830529b87)&(2**64-1), 92)]
+        if not self._supports_large:
+            self.assertRaises(TypeError, self.index, 'single.idx',
+                entries, pack_checksum)
+            return
+        idx = self.index('single.idx', entries, pack_checksum)
+        self.assertEqual(idx.get_pack_checksum(), pack_checksum)
+        self.assertEqual(2, len(idx))
+        actual_entries = list(idx.iterentries())
+        self.assertEqual(len(entries), len(actual_entries))
+        for mine, actual in zip(entries, actual_entries):
+            my_sha, my_offset, my_crc = mine
+            actual_sha, actual_offset, actual_crc = actual
+            self.assertEqual(my_sha, actual_sha)
+            self.assertEqual(my_offset, actual_offset)
+            if self._has_crc32_checksum:
+                self.assertEqual(my_crc, actual_crc)
+            else:
+                self.assertTrue(actual_crc is None)
+
     def test_single(self):
         entry_sha = hex_to_sha('6f670c0fb53f9463760b7295fbb814e965fb20c8')
         my_entries = [(entry_sha, 178, 42)]
@@ -525,6 +549,7 @@ class TestMemoryIndexWriting(TestCase, BaseTestPackIndexWriting):
     def setUp(self):
         TestCase.setUp(self)
         self._has_crc32_checksum = True
+        self._supports_large = True
 
     def index(self, filename, entries, pack_checksum):
         return MemoryPackIndex(entries, pack_checksum)
@@ -540,6 +565,7 @@ class TestPackIndexWritingv1(TestCase, BaseTestFilePackIndexWriting):
         BaseTestFilePackIndexWriting.setUp(self)
         self._has_crc32_checksum = False
         self._expected_version = 1
+        self._supports_large = False
         self._write_fn = write_pack_index_v1
 
     def tearDown(self):
@@ -553,6 +579,7 @@ class TestPackIndexWritingv2(TestCase, BaseTestFilePackIndexWriting):
         TestCase.setUp(self)
         BaseTestFilePackIndexWriting.setUp(self)
         self._has_crc32_checksum = True
+        self._supports_large = True
         self._expected_version = 2
         self._write_fn = write_pack_index_v2