lib.rs 6.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226
  1. /*
  2. * Copyright (C) 2009 Jelmer Vernooij <jelmer@jelmer.uk>
  3. *
  4. * Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
  5. * General Public License as public by the Free Software Foundation; version 2.0
  6. * or (at your option) any later version. You can redistribute it and/or
  7. * modify it under the terms of either of these two licenses.
  8. *
  9. * Unless required by applicable law or agreed to in writing, software
  10. * distributed under the License is distributed on an "AS IS" BASIS,
  11. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. * See the License for the specific language governing permissions and
  13. * limitations under the License.
  14. *
  15. * You should have received a copy of the licenses; if not, see
  16. * <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
  17. * and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
  18. * License, Version 2.0.
  19. */
  20. use pyo3::exceptions::{PyTypeError, PyValueError};
  21. use pyo3::prelude::*;
  22. use pyo3::types::{PyBytes, PyList};
  23. pyo3::import_exception!(dulwich.errors, ApplyDeltaError);
  24. fn py_is_sha(sha: &PyObject, py: Python) -> PyResult<bool> {
  25. // Check if the object is a bytes object
  26. if sha.bind(py).is_instance_of::<PyBytes>() {
  27. // Check if the bytes object has a size of 20
  28. if sha.extract::<&[u8]>(py)?.len() == 20 {
  29. Ok(true)
  30. } else {
  31. Ok(false)
  32. }
  33. } else {
  34. Ok(false)
  35. }
  36. }
  37. #[pyfunction]
  38. fn bisect_find_sha(
  39. py: Python,
  40. start: i32,
  41. end: i32,
  42. sha: Py<PyBytes>,
  43. unpack_name: PyObject,
  44. ) -> PyResult<Option<i32>> {
  45. // Convert sha_obj to a byte slice
  46. let sha = sha.as_bytes(py);
  47. let sha_len = sha.len();
  48. // Check if sha is 20 bytes long
  49. if sha_len != 20 {
  50. return Err(PyValueError::new_err("Sha is not 20 bytes long"));
  51. }
  52. // Check if start > end
  53. if start > end {
  54. return Err(PyValueError::new_err("start > end"));
  55. }
  56. // Binary search loop
  57. let mut start = start;
  58. let mut end = end;
  59. loop {
  60. if start > end {
  61. break;
  62. }
  63. let i = (start + end) / 2;
  64. let file_sha = unpack_name.call1(py, (i,))?;
  65. if !py_is_sha(&file_sha, py)? {
  66. return Err(PyTypeError::new_err("unpack_name returned non-sha object"));
  67. }
  68. match file_sha.extract::<&[u8]>(py).unwrap().cmp(sha) {
  69. std::cmp::Ordering::Less => {
  70. start = i + 1;
  71. }
  72. std::cmp::Ordering::Greater => {
  73. end = i - 1;
  74. }
  75. std::cmp::Ordering::Equal => {
  76. return Ok(Some(i));
  77. }
  78. }
  79. }
  80. Ok(None)
  81. }
  82. fn get_delta_header_size(delta: &[u8], index: &mut usize, length: usize) -> usize {
  83. let mut size: usize = 0;
  84. let mut i: usize = 0;
  85. while *index < length {
  86. let cmd = delta[*index];
  87. *index += 1;
  88. size |= ((cmd & !0x80) as usize) << i;
  89. i += 7;
  90. if cmd & 0x80 == 0 {
  91. break;
  92. }
  93. }
  94. size
  95. }
  96. fn py_chunked_as_string<'a>(
  97. py: Python<'a>,
  98. py_buf: &'a PyObject,
  99. ) -> PyResult<std::borrow::Cow<'a, [u8]>> {
  100. if let Ok(py_list) = py_buf.extract::<Bound<PyList>>(py) {
  101. let mut buf = Vec::new();
  102. for chunk in py_list.iter() {
  103. if let Ok(chunk) = chunk.extract::<&[u8]>() {
  104. buf.extend_from_slice(chunk);
  105. } else if let Ok(chunk) = chunk.extract::<Vec<u8>>() {
  106. buf.extend(chunk);
  107. } else {
  108. return Err(PyTypeError::new_err(format!(
  109. "chunk is not a byte string, but a {:?}",
  110. chunk.get_type().name()
  111. )));
  112. }
  113. }
  114. Ok(buf.into())
  115. } else if py_buf.extract::<Bound<PyBytes>>(py).is_ok() {
  116. Ok(std::borrow::Cow::Borrowed(py_buf.extract::<&[u8]>(py)?))
  117. } else {
  118. Err(PyTypeError::new_err(
  119. "buf is not a string or a list of chunks",
  120. ))
  121. }
  122. }
  123. #[pyfunction]
  124. fn apply_delta(py: Python, py_src_buf: PyObject, py_delta: PyObject) -> PyResult<Vec<PyObject>> {
  125. let src_buf = py_chunked_as_string(py, &py_src_buf)?;
  126. let delta = py_chunked_as_string(py, &py_delta)?;
  127. let src_buf_len = src_buf.len();
  128. let delta_len = delta.len();
  129. let mut index = 0;
  130. let src_size = get_delta_header_size(delta.as_ref(), &mut index, delta_len);
  131. if src_size != src_buf_len {
  132. return Err(ApplyDeltaError::new_err(format!(
  133. "Unexpected source buffer size: {} vs {}",
  134. src_size, src_buf_len
  135. )));
  136. }
  137. let dest_size = get_delta_header_size(delta.as_ref(), &mut index, delta_len);
  138. let mut out = vec![0; dest_size];
  139. let mut outindex = 0;
  140. while index < delta_len {
  141. let cmd = delta[index];
  142. index += 1;
  143. if cmd & 0x80 != 0 {
  144. let mut cp_off = 0;
  145. let mut cp_size = 0;
  146. for i in 0..4 {
  147. if cmd & (1 << i) != 0 {
  148. let x = delta[index] as usize;
  149. index += 1;
  150. cp_off |= x << (i * 8);
  151. }
  152. }
  153. for i in 0..3 {
  154. if cmd & (1 << (4 + i)) != 0 {
  155. let x = delta[index] as usize;
  156. index += 1;
  157. cp_size |= x << (i * 8);
  158. }
  159. }
  160. if cp_size == 0 {
  161. cp_size = 0x10000;
  162. }
  163. if cp_off + cp_size < cp_size || cp_off + cp_size > src_size || cp_size > dest_size {
  164. break;
  165. }
  166. out[outindex..outindex + cp_size].copy_from_slice(&src_buf[cp_off..cp_off + cp_size]);
  167. outindex += cp_size;
  168. } else if cmd != 0 {
  169. if (cmd as usize) > dest_size {
  170. break;
  171. }
  172. // Raise ApplyDeltaError if there are more bytes to copy than space
  173. if outindex + cmd as usize > dest_size {
  174. return Err(ApplyDeltaError::new_err("Not enough space to copy"));
  175. }
  176. out[outindex..outindex + cmd as usize]
  177. .copy_from_slice(&delta[index..index + cmd as usize]);
  178. outindex += cmd as usize;
  179. index += cmd as usize;
  180. } else {
  181. return Err(ApplyDeltaError::new_err("Invalid opcode 0"));
  182. }
  183. }
  184. if index != delta_len {
  185. return Err(ApplyDeltaError::new_err("delta not empty"));
  186. }
  187. if outindex != dest_size {
  188. return Err(ApplyDeltaError::new_err("dest size incorrect"));
  189. }
  190. Ok(vec![PyBytes::new(py, &out).into()])
  191. }
  192. #[pymodule]
  193. fn _pack(_py: Python, m: &Bound<PyModule>) -> PyResult<()> {
  194. m.add_function(wrap_pyfunction!(bisect_find_sha, m)?)?;
  195. m.add_function(wrap_pyfunction!(apply_delta, m)?)?;
  196. Ok(())
  197. }