mailmap.py 5.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173
  1. # mailmap.py -- Mailmap reader
  2. # Copyright (C) 2018 Jelmer Vernooij <jelmer@jelmer.uk>
  3. #
  4. # SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
  5. # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
  6. # General Public License as published by the Free Software Foundation; version 2.0
  7. # or (at your option) any later version. You can redistribute it and/or
  8. # modify it under the terms of either of these two licenses.
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. #
  16. # You should have received a copy of the licenses; if not, see
  17. # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
  18. # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
  19. # License, Version 2.0.
  20. #
  21. """Mailmap file reader."""
  22. __all__ = [
  23. "Mailmap",
  24. "parse_identity",
  25. "read_mailmap",
  26. ]
  27. from collections.abc import Iterator
  28. from typing import IO
  29. def parse_identity(text: bytes) -> tuple[bytes | None, bytes | None]:
  30. """Parse an identity string into name and email.
  31. Args:
  32. text: Identity string in format "Name <email>"
  33. Returns:
  34. Tuple of (name, email) where either can be None
  35. """
  36. # TODO(jelmer): Integrate this with dulwich.fastexport.split_email and
  37. # dulwich.repo.check_user_identity
  38. (name_str, email_str) = text.rsplit(b"<", 1)
  39. name_str = name_str.strip()
  40. email_str = email_str.rstrip(b">").strip()
  41. name: bytes | None = name_str if name_str else None
  42. email: bytes | None = email_str if email_str else None
  43. return (name, email)
  44. def read_mailmap(
  45. f: IO[bytes],
  46. ) -> Iterator[
  47. tuple[
  48. tuple[bytes | None, bytes | None],
  49. tuple[bytes | None, bytes | None] | None,
  50. ]
  51. ]:
  52. """Read a mailmap.
  53. Args:
  54. f: File-like object to read from
  55. Returns: Iterator over
  56. ((canonical_name, canonical_email), (from_name, from_email)) tuples
  57. """
  58. for line in f:
  59. # Remove comments
  60. line = line.split(b"#")[0]
  61. line = line.strip()
  62. if not line:
  63. continue
  64. (canonical_identity, from_identity) = line.split(b">", 1)
  65. canonical_identity += b">"
  66. if from_identity.strip():
  67. parsed_from_identity = parse_identity(from_identity)
  68. else:
  69. parsed_from_identity = None
  70. parsed_canonical_identity = parse_identity(canonical_identity)
  71. yield parsed_canonical_identity, parsed_from_identity
  72. class Mailmap:
  73. """Class for accessing a mailmap file."""
  74. def __init__(
  75. self,
  76. map: Iterator[
  77. tuple[
  78. tuple[bytes | None, bytes | None],
  79. tuple[bytes | None, bytes | None] | None,
  80. ]
  81. ]
  82. | None = None,
  83. ) -> None:
  84. """Initialize Mailmap.
  85. Args:
  86. map: Optional iterator of (canonical_identity, from_identity) tuples
  87. """
  88. self._table: dict[
  89. tuple[bytes | None, bytes | None],
  90. tuple[bytes | None, bytes | None],
  91. ] = {}
  92. if map:
  93. for canonical_identity, from_identity in map:
  94. self.add_entry(canonical_identity, from_identity)
  95. def add_entry(
  96. self,
  97. canonical_identity: tuple[bytes | None, bytes | None],
  98. from_identity: tuple[bytes | None, bytes | None] | None = None,
  99. ) -> None:
  100. """Add an entry to the mail mail.
  101. Any of the fields can be None, but at least one of them needs to be
  102. set.
  103. Args:
  104. canonical_identity: The canonical identity (tuple)
  105. from_identity: The from identity (tuple)
  106. """
  107. if from_identity is None:
  108. from_name, from_email = None, None
  109. else:
  110. (from_name, from_email) = from_identity
  111. (canonical_name, canonical_email) = canonical_identity
  112. if from_name is None and from_email is None:
  113. self._table[canonical_name, None] = canonical_identity
  114. self._table[None, canonical_email] = canonical_identity
  115. else:
  116. self._table[from_name, from_email] = canonical_identity
  117. def lookup(
  118. self, identity: bytes | tuple[bytes | None, bytes | None]
  119. ) -> bytes | tuple[bytes | None, bytes | None]:
  120. """Lookup an identity in this mailmail."""
  121. if not isinstance(identity, tuple):
  122. was_tuple = False
  123. identity = parse_identity(identity)
  124. else:
  125. was_tuple = True
  126. for query in [identity, (None, identity[1]), (identity[0], None)]:
  127. canonical_identity = self._table.get(query)
  128. if canonical_identity is not None:
  129. identity = (
  130. canonical_identity[0] or identity[0],
  131. canonical_identity[1] or identity[1],
  132. )
  133. break
  134. if was_tuple:
  135. return identity
  136. else:
  137. name, email = identity
  138. if name is None:
  139. name = b""
  140. if email is None:
  141. email = b""
  142. return name + b" <" + email + b">"
  143. @classmethod
  144. def from_path(cls, path: str) -> "Mailmap":
  145. """Create Mailmap from file path.
  146. Args:
  147. path: Path to mailmap file
  148. Returns:
  149. Mailmap instance
  150. """
  151. with open(path, "rb") as f:
  152. return cls(read_mailmap(f))