lib.rs 6.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196
  1. /*
  2. * Copyright (C) 2009 Jelmer Vernooij <jelmer@jelmer.uk>
  3. *
  4. * Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
  5. * General Public License as published by the Free Software Foundation; version 2.0
  6. * or (at your option) any later version. You can redistribute it and/or
  7. * modify it under the terms of either of these two licenses.
  8. *
  9. * Unless required by applicable law or agreed to in writing, software
  10. * distributed under the License is distributed on an "AS IS" BASIS,
  11. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. * See the License for the specific language governing permissions and
  13. * limitations under the License.
  14. *
  15. * You should have received a copy of the licenses; if not, see
  16. * <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
  17. * and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
  18. * License, Version 2.0.
  19. */
  20. use memchr::memchr;
  21. use pyo3::exceptions::PyTypeError;
  22. use pyo3::import_exception;
  23. use pyo3::prelude::*;
  24. use pyo3::types::{PyBytes, PyDict};
  25. import_exception!(dulwich.errors, ObjectFormatException);
  26. const S_IFDIR: u32 = 0o40000;
  27. const S_IFMT: u32 = 0o170000; // File type mask
  28. #[inline]
  29. fn bytehex(byte: u8) -> u8 {
  30. match byte {
  31. 0..=9 => byte + b'0',
  32. 10..=15 => byte - 10 + b'a',
  33. _ => unreachable!(),
  34. }
  35. }
  36. fn sha_to_pyhex(py: Python, sha: &[u8]) -> PyResult<Py<PyAny>> {
  37. let mut hexsha = Vec::new();
  38. for c in sha {
  39. hexsha.push(bytehex((c & 0xF0) >> 4));
  40. hexsha.push(bytehex(c & 0x0F));
  41. }
  42. Ok(PyBytes::new(py, hexsha.as_slice()).into())
  43. }
  44. fn parse_tree_with_length(
  45. py: Python,
  46. mut text: &[u8],
  47. strict: bool,
  48. hash_len: usize,
  49. ) -> PyResult<Vec<(PyObject, u32, PyObject)>> {
  50. let mut entries = Vec::new();
  51. while !text.is_empty() {
  52. let mode_end = memchr(b' ', text)
  53. .ok_or_else(|| ObjectFormatException::new_err(("Missing terminator for mode",)))?;
  54. let text_str = String::from_utf8_lossy(&text[..mode_end]).to_string();
  55. let mode = u32::from_str_radix(text_str.as_str(), 8)
  56. .map_err(|e| ObjectFormatException::new_err((format!("invalid mode: {}", e),)))?;
  57. if strict && text[0] == b'0' {
  58. return Err(ObjectFormatException::new_err((
  59. "Illegal leading zero on mode",
  60. )));
  61. }
  62. text = &text[mode_end + 1..];
  63. let namelen = memchr(b'\0', text)
  64. .ok_or_else(|| ObjectFormatException::new_err(("Missing trailing \\0",)))?;
  65. let name = &text[..namelen];
  66. // Skip name and null terminator
  67. text = &text[namelen + 1..];
  68. // Check if we have enough bytes for the hash
  69. if text.len() < hash_len {
  70. return Err(ObjectFormatException::new_err(("SHA truncated",)));
  71. }
  72. let sha = &text[..hash_len];
  73. entries.push((
  74. PyBytes::new(py, name).into_pyobject(py)?.unbind().into(),
  75. mode,
  76. sha_to_pyhex(py, sha)?,
  77. ));
  78. text = &text[hash_len..];
  79. }
  80. Ok(entries)
  81. }
  82. #[pyfunction]
  83. #[pyo3(signature = (text, strict=None, hash_algorithm=None))]
  84. fn parse_tree(
  85. py: Python,
  86. text: &[u8],
  87. strict: Option<bool>,
  88. hash_algorithm: Option<PyObject>,
  89. ) -> PyResult<Vec<(PyObject, u32, PyObject)>> {
  90. let strict = strict.unwrap_or(false);
  91. // Determine hash length from hash_algorithm if provided
  92. if let Some(algo) = hash_algorithm {
  93. // Get oid_length attribute from hash algorithm object
  94. let oid_length: usize = algo.getattr(py, "oid_length")?.extract(py)?;
  95. parse_tree_with_length(py, text, strict, oid_length)
  96. } else {
  97. // Try to auto-detect by attempting to parse with both lengths
  98. // We'll attempt to parse with SHA1 first (20 bytes), then SHA256 (32 bytes)
  99. match parse_tree_with_length(py, text, strict, 20) {
  100. Ok(entries) => Ok(entries),
  101. Err(_) => {
  102. // SHA1 failed, try SHA256
  103. parse_tree_with_length(py, text, strict, 32)
  104. }
  105. }
  106. }
  107. }
  108. fn cmp_with_suffix(a: (u32, &[u8]), b: (u32, &[u8])) -> std::cmp::Ordering {
  109. let len = std::cmp::min(a.1.len(), b.1.len());
  110. let cmp = a.1[..len].cmp(&b.1[..len]);
  111. if cmp != std::cmp::Ordering::Equal {
  112. return cmp;
  113. }
  114. let c1 =
  115. a.1.get(len)
  116. .map_or_else(|| if (a.0 & S_IFMT) == S_IFDIR { b'/' } else { 0 }, |&c| c);
  117. let c2 =
  118. b.1.get(len)
  119. .map_or_else(|| if (b.0 & S_IFMT) == S_IFDIR { b'/' } else { 0 }, |&c| c);
  120. c1.cmp(&c2)
  121. }
  122. /// Iterate over a tree entries dictionary.
  123. ///
  124. /// # Arguments
  125. ///
  126. /// name_order: If True, iterate entries in order of their name. If
  127. /// False, iterate entries in tree order, that is, treat subtree entries as
  128. /// having '/' appended.
  129. /// entries: Dictionary mapping names to (mode, sha) tuples
  130. ///
  131. /// # Returns: Iterator over (name, mode, hexsha)
  132. #[pyfunction]
  133. fn sorted_tree_items(
  134. py: Python,
  135. entries: &Bound<PyDict>,
  136. name_order: bool,
  137. ) -> PyResult<Vec<Py<PyAny>>> {
  138. let mut qsort_entries = entries
  139. .iter()
  140. .map(|(name, value)| -> PyResult<(Vec<u8>, u32, Vec<u8>)> {
  141. let value = value
  142. .extract::<(u32, Vec<u8>)>()
  143. .map_err(|e| PyTypeError::new_err((format!("invalid type: {}", e),)))?;
  144. Ok((name.extract::<Vec<u8>>().unwrap(), value.0, value.1))
  145. })
  146. .collect::<PyResult<Vec<(Vec<u8>, u32, Vec<u8>)>>>()?;
  147. if name_order {
  148. qsort_entries.sort_by(|a, b| a.0.cmp(&b.0));
  149. } else {
  150. qsort_entries.sort_by(|a, b| cmp_with_suffix((a.1, a.0.as_slice()), (b.1, b.0.as_slice())));
  151. }
  152. let objectsm = py.import("dulwich.objects")?;
  153. let tree_entry_cls = objectsm.getattr("TreeEntry")?;
  154. qsort_entries
  155. .into_iter()
  156. .map(|(name, mode, hexsha)| -> PyResult<Py<PyAny>> {
  157. Ok(tree_entry_cls
  158. .call1((
  159. PyBytes::new(py, name.as_slice())
  160. .into_pyobject(py)?
  161. .unbind()
  162. .into_any(),
  163. mode,
  164. PyBytes::new(py, hexsha.as_slice())
  165. .into_pyobject(py)?
  166. .unbind()
  167. .into_any(),
  168. ))?
  169. .unbind()
  170. .into())
  171. })
  172. .collect::<PyResult<Vec<Py<PyAny>>>>()
  173. }
  174. #[pymodule]
  175. fn _objects(_py: Python, m: &Bound<PyModule>) -> PyResult<()> {
  176. m.add_function(wrap_pyfunction!(sorted_tree_items, m)?)?;
  177. m.add_function(wrap_pyfunction!(parse_tree, m)?)?;
  178. Ok(())
  179. }