object_format.py 4.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163
  1. # hash.py -- Object format abstraction layer for Git
  2. # Copyright (C) 2024 The Dulwich contributors
  3. #
  4. # SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
  5. # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
  6. # General Public License as published by the Free Software Foundation; version 2.0
  7. # or (at your option) any later version. You can redistribute it and/or
  8. # modify it under the terms of either of these two licenses.
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. #
  16. # You should have received a copy of the licenses; if not, see
  17. # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
  18. # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
  19. # License, Version 2.0.
  20. #
  21. """Object format abstraction for Git objects.
  22. This module provides an abstraction layer for different object formats
  23. used in Git repositories (SHA-1 and SHA-256).
  24. """
  25. from collections.abc import Callable
  26. from hashlib import sha1, sha256
  27. from typing import TYPE_CHECKING
  28. if TYPE_CHECKING:
  29. from _hashlib import HASH
  30. class ObjectFormat:
  31. """Object format (hash algorithm) used in Git."""
  32. def __init__(
  33. self,
  34. name: str,
  35. type_num: int,
  36. oid_length: int,
  37. hex_length: int,
  38. hash_func: Callable[[], "HASH"],
  39. ) -> None:
  40. """Initialize an object format.
  41. Args:
  42. name: Name of the format (e.g., "sha1", "sha256")
  43. type_num: Format type number used in Git
  44. oid_length: Length of the binary object ID in bytes
  45. hex_length: Length of the hexadecimal object ID in characters
  46. hash_func: Hash function from hashlib
  47. """
  48. self.name = name
  49. self.type_num = type_num
  50. self.oid_length = oid_length
  51. self.hex_length = hex_length
  52. self.hash_func = hash_func
  53. def __str__(self) -> str:
  54. """Return string representation."""
  55. return self.name
  56. def __repr__(self) -> str:
  57. """Return repr."""
  58. return f"ObjectFormat({self.name!r})"
  59. def new_hash(self) -> "HASH":
  60. """Create a new hash object."""
  61. return self.hash_func()
  62. def hash_object(self, data: bytes) -> bytes:
  63. """Hash data and return the digest.
  64. Args:
  65. data: Data to hash
  66. Returns:
  67. Binary digest
  68. """
  69. h = self.new_hash()
  70. h.update(data)
  71. return h.digest()
  72. def hash_object_hex(self, data: bytes) -> bytes:
  73. """Hash data and return the hexadecimal digest.
  74. Args:
  75. data: Data to hash
  76. Returns:
  77. Hexadecimal digest as bytes
  78. """
  79. h = self.new_hash()
  80. h.update(data)
  81. return h.hexdigest().encode("ascii")
  82. # Define the supported object formats
  83. SHA1 = ObjectFormat("sha1", type_num=1, oid_length=20, hex_length=40, hash_func=sha1)
  84. SHA256 = ObjectFormat(
  85. "sha256", type_num=20, oid_length=32, hex_length=64, hash_func=sha256
  86. )
  87. # Map of format names to ObjectFormat instances
  88. OBJECT_FORMATS = {
  89. "sha1": SHA1,
  90. "sha256": SHA256,
  91. }
  92. # Map of format numbers to ObjectFormat instances
  93. OBJECT_FORMAT_TYPE_NUMS = {
  94. 1: SHA1,
  95. 2: SHA256,
  96. }
  97. # Default format for backward compatibility
  98. DEFAULT_OBJECT_FORMAT = SHA1
  99. def get_object_format(name: str | None = None) -> ObjectFormat:
  100. """Get an object format by name.
  101. Args:
  102. name: Format name ("sha1" or "sha256"). If None, returns default.
  103. Returns:
  104. ObjectFormat instance
  105. Raises:
  106. ValueError: If the format name is not supported
  107. """
  108. if name is None:
  109. return DEFAULT_OBJECT_FORMAT
  110. try:
  111. return OBJECT_FORMATS[name.lower()]
  112. except KeyError:
  113. raise ValueError(f"Unsupported object format: {name}")
  114. def verify_same_object_format(*formats: ObjectFormat) -> ObjectFormat:
  115. """Verify that all provided object formats are the same.
  116. Args:
  117. *formats: Object format instances to verify
  118. Returns:
  119. The common object format
  120. Raises:
  121. ValueError: If formats don't match or no formats provided
  122. """
  123. if not formats:
  124. raise ValueError("At least one object format must be provided")
  125. first = formats[0]
  126. for fmt in formats[1:]:
  127. if fmt != first:
  128. raise ValueError(f"Object format mismatch: {first.name} != {fmt.name}")
  129. return first