lib.rs 6.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197
  1. /*
  2. * Copyright (C) 2010 Google, Inc.
  3. * Copyright (C) 2024 Jelmer Vernooij <jelmer@jelmer.uk>
  4. *
  5. * Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
  6. * General Public License as public by the Free Software Foundation; version 2.0
  7. * or (at your option) any later version. You can redistribute it and/or
  8. * modify it under the terms of either of these two licenses.
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. *
  16. * You should have received a copy of the licenses; if not, see
  17. * <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
  18. * and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
  19. * License, Version 2.0.
  20. */
  21. use pyo3::prelude::*;
  22. use pyo3::exceptions::PyTypeError;
  23. use pyo3::types::{PyBytes, PyList, PyTuple};
  24. use pyo3::Python;
  25. use std::cmp::Ordering;
  26. const S_IFMT: u32 = 0o170000;
  27. const S_IFDIR: u32 = 0o040000;
  28. fn add_hash(get: &Bound<PyAny>, set: &Bound<PyAny>, string: &[u8], py: Python) -> PyResult<()> {
  29. let str_obj = PyBytes::new(py, string);
  30. let hash_obj = str_obj.hash()?;
  31. let value = get.call1((hash_obj,))?;
  32. let n = string.len();
  33. set.call1((hash_obj, value.extract::<usize>()? + n))?;
  34. Ok(())
  35. }
  36. #[pyfunction]
  37. fn _count_blocks(py: Python, obj: &Bound<PyAny>) -> PyResult<Py<PyAny>> {
  38. let default_dict_cls = PyModule::import(py, "collections")?.getattr("defaultdict")?;
  39. let int_cls = PyModule::import(py, "builtins")?.getattr("int")?;
  40. let counts = default_dict_cls.call1((int_cls,))?;
  41. let get = counts.getattr("__getitem__")?;
  42. let set = counts.getattr("__setitem__")?;
  43. let chunks = obj.call_method0("as_raw_chunks")?;
  44. if !chunks.is_instance_of::<PyList>() {
  45. return Err(PyTypeError::new_err(
  46. "as_raw_chunks() did not return a list",
  47. ));
  48. }
  49. let num_chunks = chunks.extract::<Vec<Py<PyAny>>>()?.len();
  50. let pym = py.import("dulwich.diff_tree")?;
  51. let block_size = pym.getattr("_BLOCK_SIZE")?.extract::<usize>()?;
  52. let mut block: Vec<u8> = Vec::with_capacity(block_size);
  53. for i in 0..num_chunks {
  54. let chunk = chunks.get_item(i)?;
  55. if !chunk.is_instance_of::<PyBytes>() {
  56. return Err(PyTypeError::new_err("chunk is not a string"));
  57. }
  58. let chunk_str = chunk.extract::<&[u8]>()?;
  59. for c in chunk_str {
  60. block.push(*c);
  61. if *c == b'\n' || block.len() == block_size {
  62. add_hash(&get, &set, &block, py)?;
  63. block.clear();
  64. }
  65. }
  66. }
  67. if !block.is_empty() {
  68. add_hash(&get, &set, &block, py)?;
  69. }
  70. Ok(counts.into_pyobject(py).unwrap().into())
  71. }
  72. #[pyfunction]
  73. fn _is_tree(_py: Python, entry: &Bound<PyAny>) -> PyResult<bool> {
  74. if entry.is_none() {
  75. return Ok(false);
  76. }
  77. let mode = entry.getattr("mode")?;
  78. if mode.is_none() {
  79. Ok(false)
  80. } else {
  81. let lmode = mode.extract::<u32>()?;
  82. Ok((lmode & S_IFMT) == S_IFDIR)
  83. }
  84. }
  85. fn tree_entries(path: &[u8], tree: &Bound<PyAny>, py: Python) -> PyResult<Vec<Py<PyAny>>> {
  86. if tree.is_none() {
  87. return Ok(Vec::new());
  88. }
  89. let dom = py.import("dulwich.objects")?;
  90. let tree_entry_cls = dom.getattr("TreeEntry")?;
  91. let items = tree
  92. .call_method1("iteritems", (true,))?
  93. .extract::<Vec<Py<PyAny>>>()?;
  94. let mut result = Vec::new();
  95. for item in items {
  96. let (name, mode, sha) = item.extract::<(Vec<u8>, u32, Py<PyAny>)>(py)?;
  97. let mut new_path = Vec::with_capacity(path.len() + name.len() + 1);
  98. if !path.is_empty() {
  99. new_path.extend_from_slice(path);
  100. new_path.push(b'/');
  101. }
  102. new_path.extend_from_slice(name.as_slice());
  103. let tree_entry = tree_entry_cls.call1((PyBytes::new(py, &new_path), mode, sha))?;
  104. result.push(tree_entry.into_pyobject(py).unwrap().into());
  105. }
  106. Ok(result)
  107. }
  108. fn entry_path_cmp(entry1: &Bound<PyAny>, entry2: &Bound<PyAny>) -> PyResult<Ordering> {
  109. let path1_o = entry1.getattr("path")?;
  110. let path1 = path1_o.extract::<&[u8]>()?;
  111. let path2_o = entry2.getattr("path")?;
  112. let path2 = path2_o.extract::<&[u8]>()?;
  113. Ok(path1.cmp(path2))
  114. }
  115. #[pyfunction]
  116. fn _merge_entries(
  117. py: Python,
  118. path: &[u8],
  119. tree1: &Bound<PyAny>,
  120. tree2: &Bound<PyAny>,
  121. ) -> PyResult<Py<PyAny>> {
  122. let entries1 = tree_entries(path, tree1, py)?;
  123. let entries2 = tree_entries(path, tree2, py)?;
  124. let mut result = Vec::new();
  125. let mut i1 = 0;
  126. let mut i2 = 0;
  127. while i1 < entries1.len() && i2 < entries2.len() {
  128. let cmp = entry_path_cmp(entries1[i1].bind(py), entries2[i2].bind(py))?;
  129. let (e1, e2) = match cmp {
  130. Ordering::Equal => (entries1[i1].clone_ref(py), entries2[i2].clone_ref(py)),
  131. Ordering::Less => (entries1[i1].clone_ref(py), py.None()),
  132. Ordering::Greater => (py.None(), entries2[i2].clone_ref(py)),
  133. };
  134. let pair = PyTuple::new(py, &[e1, e2]).unwrap();
  135. result.push(pair);
  136. match cmp {
  137. Ordering::Equal => {
  138. i1 += 1;
  139. i2 += 1;
  140. }
  141. Ordering::Less => {
  142. i1 += 1;
  143. }
  144. Ordering::Greater => {
  145. i2 += 1;
  146. }
  147. }
  148. }
  149. while i1 < entries1.len() {
  150. let pair = PyTuple::new(py, &[entries1[i1].clone_ref(py), py.None()]).unwrap();
  151. result.push(pair);
  152. i1 += 1;
  153. }
  154. while i2 < entries2.len() {
  155. let pair = PyTuple::new(py, &[py.None(), entries2[i2].clone_ref(py)]).unwrap();
  156. result.push(pair);
  157. i2 += 1;
  158. }
  159. Ok(PyList::new(py, &result).unwrap().unbind().into())
  160. }
  161. #[pymodule]
  162. fn _diff_tree(_py: Python, m: &Bound<PyModule>) -> PyResult<()> {
  163. m.add_function(wrap_pyfunction!(_count_blocks, m)?)?;
  164. m.add_function(wrap_pyfunction!(_is_tree, m)?)?;
  165. m.add_function(wrap_pyfunction!(_merge_entries, m)?)?;
  166. Ok(())
  167. }