lib.rs 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191
  1. /*
  2. * Copyright (C) 2010 Google, Inc.
  3. * Copyright (C) 2024 Jelmer Vernooij <jelmer@jelmer.uk>
  4. *
  5. * Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
  6. * General Public License as public by the Free Software Foundation; version 2.0
  7. * or (at your option) any later version. You can redistribute it and/or
  8. * modify it under the terms of either of these two licenses.
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. *
  16. * You should have received a copy of the licenses; if not, see
  17. * <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
  18. * and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
  19. * License, Version 2.0.
  20. */
  21. use pyo3::prelude::*;
  22. use pyo3::exceptions::PyTypeError;
  23. use pyo3::types::{PyBytes, PyList, PyTuple};
  24. use pyo3::Python;
  25. use std::cmp::Ordering;
  26. const S_IFMT: u32 = 0o170000;
  27. const S_IFDIR: u32 = 0o040000;
  28. fn add_hash(get: &Bound<PyAny>, set: &Bound<PyAny>, string: &[u8], py: Python) -> PyResult<()> {
  29. let str_obj = PyBytes::new_bound(py, string);
  30. let hash_obj = str_obj.hash()?;
  31. let value = get.call1((hash_obj,))?;
  32. let n = string.len();
  33. set.call1((hash_obj, value.extract::<usize>()? + n))?;
  34. Ok(())
  35. }
  36. #[pyfunction]
  37. fn _count_blocks(py: Python, obj: &Bound<PyAny>) -> PyResult<PyObject> {
  38. let default_dict_cls = PyModule::import_bound(py, "collections")?.getattr("defaultdict")?;
  39. let int_cls = PyModule::import_bound(py, "builtins")?.getattr("int")?;
  40. let counts = default_dict_cls.call1((int_cls,))?;
  41. let get = counts.getattr("__getitem__")?;
  42. let set = counts.getattr("__setitem__")?;
  43. let chunks = obj.call_method0("as_raw_chunks")?;
  44. if !chunks.is_instance_of::<PyList>() {
  45. return Err(PyTypeError::new_err(
  46. "as_raw_chunks() did not return a list",
  47. ));
  48. }
  49. let num_chunks = chunks.extract::<Vec<PyObject>>()?.len();
  50. let pym = py.import_bound("dulwich.diff_tree")?;
  51. let block_size = pym.getattr("_BLOCK_SIZE")?.extract::<usize>()?;
  52. let mut block: Vec<u8> = Vec::with_capacity(block_size);
  53. for i in 0..num_chunks {
  54. let chunk = chunks.get_item(i)?;
  55. if !chunk.is_instance_of::<PyBytes>() {
  56. return Err(PyTypeError::new_err("chunk is not a string"));
  57. }
  58. let chunk_str = chunk.extract::<&[u8]>()?;
  59. for c in chunk_str {
  60. block.push(*c);
  61. if *c == b'\n' || block.len() == block_size {
  62. add_hash(&get, &set, &block, py)?;
  63. block.clear();
  64. }
  65. }
  66. }
  67. if !block.is_empty() {
  68. add_hash(&get, &set, &block, py)?;
  69. }
  70. Ok(counts.to_object(py))
  71. }
  72. #[pyfunction]
  73. fn _is_tree(_py: Python, entry: &Bound<PyAny>) -> PyResult<bool> {
  74. let mode = entry.getattr("mode")?;
  75. if mode.is_none() {
  76. Ok(false)
  77. } else {
  78. let lmode = mode.extract::<u32>()?;
  79. Ok((lmode & S_IFMT) == S_IFDIR)
  80. }
  81. }
  82. fn tree_entries(path: &[u8], tree: &Bound<PyAny>, py: Python) -> PyResult<Vec<PyObject>> {
  83. if tree.is_none() {
  84. return Ok(Vec::new());
  85. }
  86. let dom = py.import_bound("dulwich.objects")?;
  87. let tree_entry_cls = dom.getattr("TreeEntry")?;
  88. let items = tree
  89. .call_method1("iteritems", (true,))?
  90. .extract::<Vec<PyObject>>()?;
  91. let mut result = Vec::new();
  92. for item in items {
  93. let (name, mode, sha) = item.extract::<(Vec<u8>, u32, PyObject)>(py)?;
  94. let mut new_path = Vec::with_capacity(path.len() + name.len() + 1);
  95. if !path.is_empty() {
  96. new_path.extend_from_slice(path);
  97. new_path.push(b'/');
  98. }
  99. new_path.extend_from_slice(name.as_slice());
  100. let tree_entry = tree_entry_cls.call1((PyBytes::new_bound(py, &new_path), mode, sha))?;
  101. result.push(tree_entry.to_object(py));
  102. }
  103. Ok(result)
  104. }
  105. fn entry_path_cmp(entry1: &Bound<PyAny>, entry2: &Bound<PyAny>) -> PyResult<Ordering> {
  106. let path1_o = entry1.getattr("path")?;
  107. let path1 = path1_o.extract::<&[u8]>()?;
  108. let path2_o = entry2.getattr("path")?;
  109. let path2 = path2_o.extract::<&[u8]>()?;
  110. Ok(path1.cmp(path2))
  111. }
  112. #[pyfunction]
  113. fn _merge_entries(py: Python, path: &[u8], tree1: &Bound<PyAny>, tree2: &Bound<PyAny>) -> PyResult<PyObject> {
  114. let entries1 = tree_entries(path, tree1, py)?;
  115. let entries2 = tree_entries(path, tree2, py)?;
  116. let pym = py.import_bound("dulwich.diff_tree")?;
  117. let null_entry = pym.getattr("_NULL_ENTRY")?.to_object(py);
  118. let mut result = Vec::new();
  119. let mut i1 = 0;
  120. let mut i2 = 0;
  121. while i1 < entries1.len() && i2 < entries2.len() {
  122. let cmp = entry_path_cmp(entries1[i1].bind(py), entries2[i2].bind(py))?;
  123. let (e1, e2) = match cmp {
  124. Ordering::Equal => (entries1[i1].clone_ref(py), entries2[i2].clone_ref(py)),
  125. Ordering::Less => (entries1[i1].clone_ref(py), null_entry.clone_ref(py)),
  126. Ordering::Greater => (null_entry.clone_ref(py), entries2[i2].clone_ref(py)),
  127. };
  128. let pair = PyTuple::new_bound(py, &[e1, e2]);
  129. result.push(pair);
  130. match cmp {
  131. Ordering::Equal => {
  132. i1 += 1;
  133. i2 += 1;
  134. }
  135. Ordering::Less => {
  136. i1 += 1;
  137. }
  138. Ordering::Greater => {
  139. i2 += 1;
  140. }
  141. }
  142. }
  143. while i1 < entries1.len() {
  144. let pair = PyTuple::new_bound(py, &[entries1[i1].clone_ref(py), null_entry.clone_ref(py)]);
  145. result.push(pair);
  146. i1 += 1;
  147. }
  148. while i2 < entries2.len() {
  149. let pair = PyTuple::new_bound(py, &[null_entry.clone_ref(py), entries2[i2].clone_ref(py)]);
  150. result.push(pair);
  151. i2 += 1;
  152. }
  153. Ok(PyList::new_bound(py, &result).to_object(py))
  154. }
  155. #[pymodule]
  156. fn _diff_tree(_py: Python, m: &Bound<PyModule>) -> PyResult<()> {
  157. m.add_function(wrap_pyfunction!(_count_blocks, m)?)?;
  158. m.add_function(wrap_pyfunction!(_is_tree, m)?)?;
  159. m.add_function(wrap_pyfunction!(_merge_entries, m)?)?;
  160. Ok(())
  161. }