Răsfoiți Sursa

Revert "Factor out varint"

This reverts commit baa23738f8ec8253c484910cc8c8de95eb791509.
Jelmer Vernooij 2 luni în urmă
părinte
comite
68815901bd
5 a modificat fișierele cu 54 adăugiri și 232 ștergeri
  1. 51 3
      dulwich/index.py
  2. 0 105
      dulwich/varint.py
  3. 0 1
      tests/__init__.py
  4. 3 3
      tests/test_index.py
  5. 0 120
      tests/test_varint.py

+ 51 - 3
dulwich/index.py

@@ -48,7 +48,6 @@ from .objects import (
     sha_to_hex,
 )
 from .pack import ObjectContainer, SHA1Reader, SHA1Writer
-from .varint import decode_varint, encode_varint
 
 # 2-bit stage (during merge)
 FLAG_STAGEMASK = 0x3000
@@ -77,6 +76,55 @@ EOIE_EXTENSION = b"EOIE"
 IEOT_EXTENSION = b"IEOT"
 
 
+def _encode_varint(value: int) -> bytes:
+    """Encode an integer using variable-width encoding.
+
+    Same format as used for OFS_DELTA pack entries and index v4 path compression.
+    Uses 7 bits per byte, with the high bit indicating continuation.
+
+    Args:
+      value: Integer to encode
+    Returns:
+      Encoded bytes
+    """
+    if value == 0:
+        return b"\x00"
+
+    result = []
+    while value > 0:
+        byte = value & 0x7F  # Take lower 7 bits
+        value >>= 7
+        if value > 0:
+            byte |= 0x80  # Set continuation bit
+        result.append(byte)
+
+    return bytes(result)
+
+
+def _decode_varint(data: bytes, offset: int = 0) -> tuple[int, int]:
+    """Decode a variable-width encoded integer.
+
+    Args:
+      data: Bytes to decode from
+      offset: Starting offset in data
+    Returns:
+      tuple of (decoded_value, new_offset)
+    """
+    value = 0
+    shift = 0
+    pos = offset
+
+    while pos < len(data):
+        byte = data[pos]
+        pos += 1
+        value |= (byte & 0x7F) << shift
+        shift += 7
+        if not (byte & 0x80):  # No continuation bit
+            break
+
+    return value, pos
+
+
 def _compress_path(path: bytes, previous_path: bytes) -> bytes:
     """Compress a path relative to the previous path for index version 4.
 
@@ -104,7 +152,7 @@ def _compress_path(path: bytes, previous_path: bytes) -> bytes:
     suffix = path[common_len:]
 
     # Encode: varint(remove_len) + suffix + NUL
-    return encode_varint(remove_len) + suffix + b"\x00"
+    return _encode_varint(remove_len) + suffix + b"\x00"
 
 
 def _decompress_path(
@@ -120,7 +168,7 @@ def _decompress_path(
       tuple of (decompressed_path, new_offset)
     """
     # Decode the number of bytes to remove from previous path
-    remove_len, new_offset = decode_varint(data, offset)
+    remove_len, new_offset = _decode_varint(data, offset)
 
     # Find the NUL terminator for the suffix
     suffix_start = new_offset

+ 0 - 105
dulwich/varint.py

@@ -1,105 +0,0 @@
-# varint.py -- Variable-width integer encoding/decoding
-# Copyright (C) 2008-2013 Jelmer Vernooij <jelmer@jelmer.uk>
-#
-# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
-# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
-# General Public License as public by the Free Software Foundation; version 2.0
-# or (at your option) any later version. You can redistribute it and/or
-# modify it under the terms of either of these two licenses.
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# You should have received a copy of the licenses; if not, see
-# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
-# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
-# License, Version 2.0.
-#
-
-"""Variable-width integer encoding/decoding.
-
-This format is used in multiple places in Git:
-- Git index file format version 4 for path compression
-- Git pack files for OFS_DELTA entries
-- Git reftable format for various fields
-"""
-
-from typing import BinaryIO, Optional
-
-
-def encode_varint(value: int) -> bytes:
-    """Encode an integer using variable-width encoding.
-
-    Same format as used for OFS_DELTA pack entries and index v4 path compression.
-    Uses 7 bits per byte, with the high bit indicating continuation.
-
-    Args:
-      value: Integer to encode
-    Returns:
-      Encoded bytes
-    """
-    if value == 0:
-        return b"\x00"
-
-    result = []
-    while value > 0:
-        byte = value & 0x7F  # Take lower 7 bits
-        value >>= 7
-        if value > 0:
-            byte |= 0x80  # Set continuation bit
-        result.append(byte)
-
-    return bytes(result)
-
-
-def decode_varint(data: bytes, offset: int = 0) -> tuple[int, int]:
-    """Decode a variable-width encoded integer from bytes.
-
-    Args:
-      data: Bytes to decode from
-      offset: Starting offset in data
-    Returns:
-      tuple of (decoded_value, new_offset)
-    """
-    value = 0
-    shift = 0
-    pos = offset
-
-    while pos < len(data):
-        byte = data[pos]
-        pos += 1
-        value |= (byte & 0x7F) << shift
-        shift += 7
-        if not (byte & 0x80):  # No continuation bit
-            break
-
-    return value, pos
-
-
-def decode_varint_from_stream(stream: BinaryIO) -> Optional[int]:
-    """Decode a variable-width encoded integer from a stream.
-
-    Args:
-      stream: Stream to read from
-    Returns:
-      Decoded integer, or None if end of stream
-    """
-    value = 0
-    shift = 0
-
-    while True:
-        byte_data = stream.read(1)
-        if not byte_data:
-            return None  # End of stream
-
-        byte = byte_data[0]
-        value |= (byte & 0x7F) << shift
-        shift += 7
-
-        if not (byte & 0x80):  # No continuation bit
-            break
-
-    return value

+ 0 - 1
tests/__init__.py

@@ -155,7 +155,6 @@ def self_test_suite():
         "stash",
         "submodule",
         "utils",
-        "varint",
         "walk",
         "web",
     ]

+ 3 - 3
tests/test_index.py

@@ -1430,13 +1430,13 @@ class TestPathPrefixCompression(TestCase):
 
     def test_varint_encoding_decoding(self):
         """Test variable-width integer encoding and decoding."""
-        from dulwich.varint import decode_varint, encode_varint
+        from dulwich.index import _decode_varint, _encode_varint
 
         test_values = [0, 1, 127, 128, 255, 256, 16383, 16384, 65535, 65536]
 
         for value in test_values:
-            encoded = encode_varint(value)
-            decoded, _ = decode_varint(encoded, 0)
+            encoded = _encode_varint(value)
+            decoded, _ = _decode_varint(encoded, 0)
             self.assertEqual(value, decoded, f"Failed for value {value}")
 
     def test_path_compression_simple(self):

+ 0 - 120
tests/test_varint.py

@@ -1,120 +0,0 @@
-"""Tests for variable-width integer encoding/decoding."""
-
-import unittest
-from io import BytesIO
-
-from dulwich.varint import (
-    decode_varint,
-    decode_varint_from_stream,
-    encode_varint,
-)
-
-
-class TestVarint(unittest.TestCase):
-    """Test variable-width integer encoding and decoding."""
-
-    def test_encode_decode_basic(self):
-        """Test basic varint encoding/decoding."""
-        test_values = [0, 1, 127, 128, 255, 256, 16383, 16384, 65535, 65536]
-
-        for value in test_values:
-            encoded = encode_varint(value)
-            decoded, new_offset = decode_varint(encoded, 0)
-            self.assertEqual(value, decoded, f"Failed for value {value}")
-            self.assertEqual(
-                len(encoded), new_offset, f"Offset mismatch for value {value}"
-            )
-
-    def test_encode_decode_stream(self):
-        """Test varint encoding/decoding with streams."""
-        test_values = [0, 1, 127, 128, 255, 256, 16383, 16384, 65535, 65536]
-
-        for value in test_values:
-            encoded = encode_varint(value)
-            stream = BytesIO(encoded)
-            decoded = decode_varint_from_stream(stream)
-            self.assertEqual(
-                value, decoded, f"Failed for stream decode of value {value}"
-            )
-
-    def test_multiple_varints(self):
-        """Test encoding/decoding multiple varints in sequence."""
-        values = [42, 127, 128, 1000, 0]
-
-        # Encode all values
-        encoded_data = b""
-        for value in values:
-            encoded_data += encode_varint(value)
-
-        # Decode all values using byte offset
-        offset = 0
-        decoded_values = []
-        for _ in values:
-            decoded, offset = decode_varint(encoded_data, offset)
-            decoded_values.append(decoded)
-
-        self.assertEqual(values, decoded_values)
-
-        # Decode all values using stream
-        stream = BytesIO(encoded_data)
-        decoded_values_stream = []
-        for _ in values:
-            decoded = decode_varint_from_stream(stream)
-            self.assertIsNotNone(decoded)
-            decoded_values_stream.append(decoded)
-
-        self.assertEqual(values, decoded_values_stream)
-
-    def test_stream_end_of_data(self):
-        """Test that stream decode returns None at end of data."""
-        stream = BytesIO(b"")
-        result = decode_varint_from_stream(stream)
-        self.assertIsNone(result)
-
-        # Test with some data followed by end
-        stream = BytesIO(encode_varint(42))
-        result1 = decode_varint_from_stream(stream)
-        self.assertEqual(42, result1)
-
-        result2 = decode_varint_from_stream(stream)
-        self.assertIsNone(result2)
-
-    def test_specific_encoding_values(self):
-        """Test specific encoding patterns."""
-        # Single byte values (0-127)
-        for i in range(128):
-            encoded = encode_varint(i)
-            self.assertEqual(1, len(encoded))
-            self.assertEqual(i, encoded[0])
-
-        # Two byte values (128-16383)
-        encoded_128 = encode_varint(128)
-        self.assertEqual(b"\x80\x01", encoded_128)
-
-        encoded_255 = encode_varint(255)
-        self.assertEqual(b"\xff\x01", encoded_255)
-
-        encoded_256 = encode_varint(256)
-        self.assertEqual(b"\x80\x02", encoded_256)
-
-    def test_large_values(self):
-        """Test encoding/decoding of large values."""
-        large_values = [
-            (1 << 7) - 1,  # 127
-            (1 << 7),  # 128
-            (1 << 14) - 1,  # 16383
-            (1 << 14),  # 16384
-            (1 << 21) - 1,  # 2097151
-            (1 << 21),  # 2097152
-            (1 << 28) - 1,  # 268435455
-            (1 << 28),  # 268435456
-        ]
-
-        for value in large_values:
-            encoded = encode_varint(value)
-            decoded, _ = decode_varint(encoded, 0)
-            self.assertEqual(value, decoded, f"Failed for large value {value}")
-
-
-if __name__ == "__main__":
-    unittest.main()