Browse Source

Properly split out PackIndex1 and PackIndex2.

Jelmer Vernooij 16 years ago
parent
commit
c3b0f542b7
3 changed files with 99 additions and 44 deletions
  1. 2 1
      dulwich/object_store.py
  2. 91 37
      dulwich/pack.py
  3. 6 6
      dulwich/tests/test_pack.py

+ 2 - 1
dulwich/object_store.py

@@ -30,6 +30,7 @@ from dulwich.pack import (
     PackData, 
     PackData, 
     iter_sha1, 
     iter_sha1, 
     load_packs, 
     load_packs, 
+    load_pack_index,
     write_pack,
     write_pack,
     write_pack_data,
     write_pack_data,
     write_pack_index_v2,
     write_pack_index_v2,
@@ -145,7 +146,7 @@ class ObjectStore(object):
         temppath = os.path.join(self.pack_dir, 
         temppath = os.path.join(self.pack_dir, 
             sha_to_hex(urllib2.randombytes(20))+".temppack")
             sha_to_hex(urllib2.randombytes(20))+".temppack")
         write_pack(temppath, p.iterobjects(self.get_raw), len(p))
         write_pack(temppath, p.iterobjects(self.get_raw), len(p))
-        pack_sha = PackIndex(temppath+".idx").objects_sha1()
+        pack_sha = load_pack_index(temppath+".idx").objects_sha1()
         newbasename = os.path.join(self.pack_dir, "pack-%s" % pack_sha)
         newbasename = os.path.join(self.pack_dir, "pack-%s" % pack_sha)
         os.rename(temppath+".pack", newbasename+".pack")
         os.rename(temppath+".pack", newbasename+".pack")
         os.rename(temppath+".idx", newbasename+".idx")
         os.rename(temppath+".idx", newbasename+".idx")

+ 91 - 37
dulwich/pack.py

@@ -90,6 +90,7 @@ def read_zlib(data, offset, dec_size):
 
 
 
 
 def iter_sha1(iter):
 def iter_sha1(iter):
+    """Return the hexdigest of the SHA1 over a set of names."""
     sha1 = make_sha()
     sha1 = make_sha()
     for name in iter:
     for name in iter:
         sha1.update(name)
         sha1.update(name)
@@ -117,6 +118,20 @@ def simple_mmap(f, offset, size, access=mmap.ACCESS_READ):
         return mem, offset
         return mem, offset
 
 
 
 
+def load_pack_index(filename):
+    f = open(filename, 'r')
+    if f.read(4) == '\377tOc':
+        version = struct.unpack(">L", f.read(4))[0]
+        if version == 2:
+            f.seek(0)
+            return PackIndex2(filename, file=f)
+        else:
+            raise KeyError("Unknown pack index format %d" % version)
+    else:
+        f.seek(0)
+        return PackIndex1(filename, file=f)
+
+
 class PackIndex(object):
 class PackIndex(object):
     """An index in to a packfile.
     """An index in to a packfile.
   
   
@@ -131,7 +146,7 @@ class PackIndex(object):
     the start and end offset and then bisect in to find if the value is present.
     the start and end offset and then bisect in to find if the value is present.
     """
     """
   
   
-    def __init__(self, filename):
+    def __init__(self, filename, file=None):
         """Create a pack index object.
         """Create a pack index object.
     
     
         Provide it with the name of the index file to consider, and it will map
         Provide it with the name of the index file to consider, and it will map
@@ -141,22 +156,15 @@ class PackIndex(object):
         # Take the size now, so it can be checked each time we map the file to
         # Take the size now, so it can be checked each time we map the file to
         # ensure that it hasn't changed.
         # ensure that it hasn't changed.
         self._size = os.path.getsize(filename)
         self._size = os.path.getsize(filename)
-        self._file = open(filename, 'r')
+        if file is None:
+            self._file = open(filename, 'r')
+        else:
+            self._file = file
         self._contents, map_offset = simple_mmap(self._file, 0, self._size)
         self._contents, map_offset = simple_mmap(self._file, 0, self._size)
         assert map_offset == 0
         assert map_offset == 0
-        if self._contents[:4] != '\377tOc':
-            self.version = 1
-            self._fan_out_table = self._read_fan_out_table(0)
-        else:
-            (self.version, ) = unpack_from(">L", self._contents, 4)
-            assert self.version in (2,), "Version was %d" % self.version
-            self._fan_out_table = self._read_fan_out_table(8)
-            self._name_table_offset = 8 + 0x100 * 4
-            self._crc32_table_offset = self._name_table_offset + 20 * len(self)
-            self._pack_offset_table_offset = self._crc32_table_offset + 4 * len(self)
   
   
     def __eq__(self, other):
     def __eq__(self, other):
-        if type(self) != type(other):
+        if not isinstance(other, PackIndex):
             return False
             return False
     
     
         if self._fan_out_table != other._fan_out_table:
         if self._fan_out_table != other._fan_out_table:
@@ -179,34 +187,19 @@ class PackIndex(object):
     
     
         :return: Tuple with object name (SHA), offset in pack file and 
         :return: Tuple with object name (SHA), offset in pack file and 
               CRC32 checksum (if known)."""
               CRC32 checksum (if known)."""
-        if self.version == 1:
+        raise NotImplementedError(self._unpack_entry)
-            (offset, name) = unpack_from(">L20s", self._contents, 
-                (0x100 * 4) + (i * 24))
-            return (name, offset, None)
-        else:
-            return (self._unpack_name(i), self._unpack_offset(i), 
-                    self._unpack_crc32_checksum(i))
   
   
     def _unpack_name(self, i):
     def _unpack_name(self, i):
-        if self.version == 1:
+        """Unpack the i-th name from the index file."""
-            offset = (0x100 * 4) + (i * 24) + 4
+        raise NotImplementedError(self._unpack_name)
-        else:
-            offset = self._name_table_offset + i * 20
-        return self._contents[offset:offset+20]
   
   
     def _unpack_offset(self, i):
     def _unpack_offset(self, i):
-        if self.version == 1:
+        """Unpack the i-th object offset from the index file."""
-            offset = (0x100 * 4) + (i * 24)
+        raise NotImplementedError(self._unpack_offset)
-        else:
+
-            offset = self._pack_offset_table_offset + i * 4
-        return unpack_from(">L", self._contents, offset)[0]
-  
     def _unpack_crc32_checksum(self, i):
     def _unpack_crc32_checksum(self, i):
-        if self.version == 1:
+        """Unpack the crc32 checksum for the i-th object from the index file."""
-            return None
+        raise NotImplementedError(self._unpack_crc32_checksum)
-        else:
-            return unpack_from(">L", self._contents, 
-                                      self._crc32_table_offset + i * 4)[0]
   
   
     def __iter__(self):
     def __iter__(self):
         return imap(sha_to_hex, self._itersha())
         return imap(sha_to_hex, self._itersha())
@@ -216,6 +209,10 @@ class PackIndex(object):
             yield self._unpack_name(i)
             yield self._unpack_name(i)
   
   
     def objects_sha1(self):
     def objects_sha1(self):
+        """Return the hex SHA1 over all the shas of all objects in this pack.
+        
+        :note: This is used for the filename of the pack.
+        """
         return iter_sha1(self._itersha())
         return iter_sha1(self._itersha())
   
   
     def iterentries(self):
     def iterentries(self):
@@ -287,6 +284,63 @@ class PackIndex(object):
         return None
         return None
 
 
 
 
+class PackIndex1(PackIndex):
+    """Version 1 Pack Index."""
+
+    def __init__(self, filename, file=None):
+        PackIndex.__init__(self, filename, file)
+        self.version = 1
+        self._fan_out_table = self._read_fan_out_table(0)
+
+    def _unpack_entry(self, i):
+        (offset, name) = unpack_from(">L20s", self._contents, 
+            (0x100 * 4) + (i * 24))
+        return (name, offset, None)
+ 
+    def _unpack_name(self, i):
+        offset = (0x100 * 4) + (i * 24) + 4
+        return self._contents[offset:offset+20]
+  
+    def _unpack_offset(self, i):
+        offset = (0x100 * 4) + (i * 24)
+        return unpack_from(">L", self._contents, offset)[0]
+  
+    def _unpack_crc32_checksum(self, i):
+        # Not stored in v1 index files
+        return None 
+  
+
+class PackIndex2(PackIndex):
+    """Version 2 Pack Index."""
+
+    def __init__(self, filename, file=None):
+        PackIndex.__init__(self, filename, file)
+        assert self._contents[:4] == '\377tOc', "Not a v2 pack index file"
+        (self.version, ) = unpack_from(">L", self._contents, 4)
+        assert self.version == 2, "Version was %d" % self.version
+        self._fan_out_table = self._read_fan_out_table(8)
+        self._name_table_offset = 8 + 0x100 * 4
+        self._crc32_table_offset = self._name_table_offset + 20 * len(self)
+        self._pack_offset_table_offset = self._crc32_table_offset + 4 * len(self)
+
+    def _unpack_entry(self, i):
+        return (self._unpack_name(i), self._unpack_offset(i), 
+                self._unpack_crc32_checksum(i))
+ 
+    def _unpack_name(self, i):
+        offset = self._name_table_offset + i * 20
+        return self._contents[offset:offset+20]
+  
+    def _unpack_offset(self, i):
+        offset = self._pack_offset_table_offset + i * 4
+        return unpack_from(">L", self._contents, offset)[0]
+  
+    def _unpack_crc32_checksum(self, i):
+        return unpack_from(">L", self._contents, 
+                          self._crc32_table_offset + i * 4)[0]
+  
+
+
 def read_pack_header(f):
 def read_pack_header(f):
     header = f.read(12)
     header = f.read(12)
     assert header[:4] == "PACK"
     assert header[:4] == "PACK"
@@ -846,7 +900,7 @@ class Pack(object):
     @property
     @property
     def idx(self):
     def idx(self):
         if self._idx is None:
         if self._idx is None:
-            self._idx = PackIndex(self._idx_path)
+            self._idx = load_pack_index(self._idx_path)
         return self._idx
         return self._idx
 
 
     def close(self):
     def close(self):

+ 6 - 6
dulwich/tests/test_pack.py

@@ -25,10 +25,10 @@ from dulwich.objects import (
     )
     )
 from dulwich.pack import (
 from dulwich.pack import (
     Pack,
     Pack,
-    PackIndex,
     PackData,
     PackData,
     apply_delta,
     apply_delta,
     create_delta,
     create_delta,
+    load_pack_index,
     hex_to_sha,
     hex_to_sha,
     read_zlib,
     read_zlib,
     sha_to_hex,
     sha_to_hex,
@@ -50,7 +50,7 @@ class PackTests(unittest.TestCase):
   
   
     def get_pack_index(self, sha):
     def get_pack_index(self, sha):
         """Returns a PackIndex from the datadir with the given sha"""
         """Returns a PackIndex from the datadir with the given sha"""
-        return PackIndex(os.path.join(self.datadir, 'pack-%s.idx' % sha))
+        return load_pack_index(os.path.join(self.datadir, 'pack-%s.idx' % sha))
   
   
     def get_pack_data(self, sha):
     def get_pack_data(self, sha):
         """Returns a PackData object from the datadir with the given sha"""
         """Returns a PackData object from the datadir with the given sha"""
@@ -144,14 +144,14 @@ class TestPackData(PackTests):
     def test_create_index_v1(self):
     def test_create_index_v1(self):
         p = self.get_pack_data(pack1_sha)
         p = self.get_pack_data(pack1_sha)
         p.create_index_v1("v1test.idx")
         p.create_index_v1("v1test.idx")
-        idx1 = PackIndex("v1test.idx")
+        idx1 = load_pack_index("v1test.idx")
         idx2 = self.get_pack_index(pack1_sha)
         idx2 = self.get_pack_index(pack1_sha)
         self.assertEquals(idx1, idx2)
         self.assertEquals(idx1, idx2)
   
   
     def test_create_index_v2(self):
     def test_create_index_v2(self):
         p = self.get_pack_data(pack1_sha)
         p = self.get_pack_data(pack1_sha)
         p.create_index_v2("v2test.idx")
         p.create_index_v2("v2test.idx")
-        idx1 = PackIndex("v2test.idx")
+        idx1 = load_pack_index("v2test.idx")
         idx2 = self.get_pack_index(pack1_sha)
         idx2 = self.get_pack_index(pack1_sha)
         self.assertEquals(idx1, idx2)
         self.assertEquals(idx1, idx2)
 
 
@@ -229,7 +229,7 @@ class BaseTestPackIndexWriting(object):
     def test_empty(self):
     def test_empty(self):
         pack_checksum = 'r\x19\x80\xe8f\xaf\x9a_\x93\xadgAD\xe1E\x9b\x8b\xa3\xe7\xb7'
         pack_checksum = 'r\x19\x80\xe8f\xaf\x9a_\x93\xadgAD\xe1E\x9b\x8b\xa3\xe7\xb7'
         self._write_fn("empty.idx", [], pack_checksum)
         self._write_fn("empty.idx", [], pack_checksum)
-        idx = PackIndex("empty.idx")
+        idx = load_pack_index("empty.idx")
         self.assertTrue(idx.check())
         self.assertTrue(idx.check())
         self.assertEquals(idx.get_pack_checksum(), pack_checksum)
         self.assertEquals(idx.get_pack_checksum(), pack_checksum)
         self.assertEquals(0, len(idx))
         self.assertEquals(0, len(idx))
@@ -239,7 +239,7 @@ class BaseTestPackIndexWriting(object):
         my_entries = [('og\x0c\x0f\xb5?\x94cv\x0br\x95\xfb\xb8\x14\xe9e\xfb \xc8', 178, 42)]
         my_entries = [('og\x0c\x0f\xb5?\x94cv\x0br\x95\xfb\xb8\x14\xe9e\xfb \xc8', 178, 42)]
         my_entries.sort()
         my_entries.sort()
         self._write_fn("single.idx", my_entries, pack_checksum)
         self._write_fn("single.idx", my_entries, pack_checksum)
-        idx = PackIndex("single.idx")
+        idx = load_pack_index("single.idx")
         self.assertEquals(idx.version, self._expected_version)
         self.assertEquals(idx.version, self._expected_version)
         self.assertTrue(idx.check())
         self.assertTrue(idx.check())
         self.assertEquals(idx.get_pack_checksum(), pack_checksum)
         self.assertEquals(idx.get_pack_checksum(), pack_checksum)