2
0
Эх сурвалжийг харах

Fix crc32 checksums stored in indexes.

Jelmer Vernooij 16 жил өмнө
parent
commit
200721ba8a

+ 2 - 1
dulwich/object_store.py

@@ -120,7 +120,8 @@ class ObjectStore(object):
         :param path: Path to the pack file.
         """
         p = PackData(path)
-        temppath = os.path.join(self.pack_dir(), sha_to_hex(urllib2.randombytes(20))+".temppack")
+        temppath = os.path.join(self.pack_dir(), 
+            sha_to_hex(urllib2.randombytes(20))+".temppack")
         write_pack(temppath, p.iterobjects(self.get_raw), len(p))
         pack_sha = PackIndex(temppath+".idx").objects_sha1()
         os.rename(temppath+".pack", 

+ 0 - 3
dulwich/objects.py

@@ -160,9 +160,6 @@ class ShaFile(object):
     def _header(self):
         return "%s %lu\0" % (self._type, len(self._text))
   
-    def crc32(self):
-        return zlib.crc32(self._text) & 0xffffffff
-  
     def sha(self):
         """The SHA1 object that is the name of this object."""
         ressha = sha.new()

+ 20 - 13
dulwich/pack.py

@@ -439,7 +439,8 @@ class PackData(object):
         for i in range(len(self)):
             map = simple_mmap(f, offset, self._size-offset)
             (type, obj, total_size) = unpack_object(map)
-            yield offset, type, obj
+            crc32 = zlib.crc32(map[:total_size]) & 0xffffffff
+            yield offset, type, obj, crc32
             offset += total_size
         f.close()
   
@@ -461,7 +462,7 @@ class PackData(object):
             raise Postpone, (sha, )
         todo = list(self.iterobjects())
         while todo:
-            (offset, type, obj) = todo.pop(0)
+            (offset, type, obj, crc32) = todo.pop(0)
             at[offset] = (type, obj)
             assert isinstance(offset, int)
             assert isinstance(type, int)
@@ -475,7 +476,7 @@ class PackData(object):
                 shafile = ShaFile.from_raw_string(type, obj)
                 sha = shafile.sha().digest()
                 found[sha] = (type, obj)
-                yield sha, offset, shafile.crc32()
+                yield sha, offset, crc32
                 todo += postponed.get(sha, [])
         if postponed:
             raise KeyError([sha_to_hex(h) for h in postponed.keys()])
@@ -550,8 +551,10 @@ def write_pack_object(f, type, object):
 
     :param f: File to write to
     :param o: Object to write
+    :return: Tuple with offset at which the object was written, and crc32
     """
     ret = f.tell()
+    packed_data_hdr = ""
     if type == 6: # ref delta
         (delta_base_offset, object) = object
     elif type == 7: # offset delta
@@ -560,10 +563,10 @@ def write_pack_object(f, type, object):
     c = (type << 4) | (size & 15)
     size >>= 4
     while size:
-        f.write(chr(c | 0x80))
+        packed_data_hdr += (chr(c | 0x80))
         c = size & 0x7f
         size >>= 7
-    f.write(chr(c))
+    packed_data_hdr += chr(c)
     if type == 6: # offset delta
         ret = [delta_base_offset & 0x7f]
         delta_base_offset >>= 7
@@ -571,12 +574,13 @@ def write_pack_object(f, type, object):
             delta_base_offset -= 1
             ret.insert(0, 0x80 | (delta_base_offset & 0x7f))
             delta_base_offset >>= 7
-        f.write("".join([chr(x) for x in ret]))
+        packed_data_hdr += "".join([chr(x) for x in ret])
     elif type == 7: # ref delta
         assert len(basename) == 20
-        f.write(basename)
-    f.write(zlib.compress(object))
-    return f.tell()
+        packed_data_hdr += basename
+    packed_data = packed_data_hdr + zlib.compress(object)
+    f.write(packed_data)
+    return (f.tell(), (zlib.crc32(packed_data) & 0xffffffff))
 
 
 def write_pack(filename, objects, num_objects):
@@ -618,7 +622,6 @@ def write_pack_data(f, objects, num_objects, window=10):
     f.write(struct.pack(">L", num_objects)) # Number of objects in pack
     for o, path in recency:
         sha1 = o.sha().digest()
-        crc32 = o.crc32()
         orig_t, raw = o.as_raw_string()
         winner = raw
         t = orig_t
@@ -631,7 +634,7 @@ def write_pack_data(f, objects, num_objects, window=10):
         #    if len(delta) < len(winner):
         #        winner = delta
         #        t = 6 if magic[i][2] == 1 else 7
-        offset = write_pack_object(f, t, winner)
+        offset, crc32 = write_pack_object(f, t, winner)
         entries.append((sha1, offset, crc32))
     return entries, f.write_sha()
 
@@ -859,7 +862,11 @@ class Pack(object):
         return iter(self.idx)
 
     def check(self):
-        return self.idx.check() and self.data.check()
+        if not self.idx.check():
+            return False
+        if not self.data.check():
+            return False
+        return True
 
     def get_stored_checksum(self):
         return self.data.get_stored_checksum()
@@ -889,7 +896,7 @@ class Pack(object):
         if get_raw is None:
             def get_raw(x):
                 raise KeyError(x)
-        for offset, type, obj in self.data.iterobjects():
+        for offset, type, obj, crc32 in self.data.iterobjects():
             assert isinstance(offset, int)
             yield ShaFile.from_raw_string(
                     *resolve_object(offset, type, obj, 

+ 2 - 2
dulwich/tests/test_pack.py

@@ -135,11 +135,11 @@ class TestPackData(PackTests):
   
     def test_iterobjects(self):
         p = self.get_pack_data(pack1_sha)
-        self.assertEquals([(12, 1, 'tree b2a2766a2879c209ab1176e7e778b81ae422eeaa\nauthor James Westby <jw+debian@jameswestby.net> 1174945067 +0100\ncommitter James Westby <jw+debian@jameswestby.net> 1174945067 +0100\n\nTest commit\n'), (138, 2, '100644 a\x00og\x0c\x0f\xb5?\x94cv\x0br\x95\xfb\xb8\x14\xe9e\xfb \xc8'), (178, 3, 'test 1\n')], list(p.iterobjects()))
+        self.assertEquals([(12, 1, 'tree b2a2766a2879c209ab1176e7e778b81ae422eeaa\nauthor James Westby <jw+debian@jameswestby.net> 1174945067 +0100\ncommitter James Westby <jw+debian@jameswestby.net> 1174945067 +0100\n\nTest commit\n', 3775879613L), (138, 2, '100644 a\x00og\x0c\x0f\xb5?\x94cv\x0br\x95\xfb\xb8\x14\xe9e\xfb \xc8', 912998690L), (178, 3, 'test 1\n', 1373561701L)], list(p.iterobjects()))
   
     def test_iterentries(self):
         p = self.get_pack_data(pack1_sha)
-        self.assertEquals(set([('og\x0c\x0f\xb5?\x94cv\x0br\x95\xfb\xb8\x14\xe9e\xfb \xc8', 178, 2576920631), ('\xb2\xa2vj(y\xc2\t\xab\x11v\xe7\xe7x\xb8\x1a\xe4"\xee\xaa', 138, 3393920822), ('\xf1\x8f\xaa\x16S\x1a\xc5p\xa3\xfd\xc8\xc7\xca\x16h%H\xda\xfd\x12', 12, 1185722901)]), set(p.iterentries()))
+        self.assertEquals(set([('og\x0c\x0f\xb5?\x94cv\x0br\x95\xfb\xb8\x14\xe9e\xfb \xc8', 178, 1373561701L), ('\xb2\xa2vj(y\xc2\t\xab\x11v\xe7\xe7x\xb8\x1a\xe4"\xee\xaa', 138, 912998690L), ('\xf1\x8f\xaa\x16S\x1a\xc5p\xa3\xfd\xc8\xc7\xca\x16h%H\xda\xfd\x12', 12, 3775879613L)]), set(p.iterentries()))
   
     def test_create_index_v1(self):
         p = self.get_pack_data(pack1_sha)