lib.rs 6.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214
  1. /*
  2. * Copyright (C) 2009 Jelmer Vernooij <jelmer@jelmer.uk>
  3. *
  4. * Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
  5. * General Public License as public by the Free Software Foundation; version 2.0
  6. * or (at your option) any later version. You can redistribute it and/or
  7. * modify it under the terms of either of these two licenses.
  8. *
  9. * Unless required by applicable law or agreed to in writing, software
  10. * distributed under the License is distributed on an "AS IS" BASIS,
  11. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. * See the License for the specific language governing permissions and
  13. * limitations under the License.
  14. *
  15. * You should have received a copy of the licenses; if not, see
  16. * <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
  17. * and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
  18. * License, Version 2.0.
  19. */
  20. use pyo3::prelude::*;
  21. use pyo3::types::{PyList,PyBytes};
  22. use pyo3::exceptions::{PyValueError, PyTypeError};
  23. pyo3::import_exception!(dulwich.errors, ApplyDeltaError);
  24. fn py_is_sha(sha: &PyObject, py: Python) -> PyResult<bool> {
  25. // Check if the object is a bytes object
  26. if sha.bind(py).is_instance_of::<PyBytes>() {
  27. // Check if the bytes object has a size of 20
  28. if sha.extract::<&[u8]>(py)?.len() == 20 {
  29. Ok(true)
  30. } else {
  31. Ok(false)
  32. }
  33. } else {
  34. Ok(false)
  35. }
  36. }
  37. #[pyfunction]
  38. fn bisect_find_sha(py: Python, start: i32, end: i32, sha: Py<PyBytes>, unpack_name: PyObject) -> PyResult<Option<i32>> {
  39. // Convert sha_obj to a byte slice
  40. let sha = sha.as_bytes(py);
  41. let sha_len = sha.len();
  42. // Check if sha is 20 bytes long
  43. if sha_len != 20 {
  44. return Err(PyValueError::new_err("Sha is not 20 bytes long"));
  45. }
  46. // Check if start > end
  47. if start > end {
  48. return Err(PyValueError::new_err("start > end"));
  49. }
  50. // Binary search loop
  51. let mut start = start;
  52. let mut end = end;
  53. loop {
  54. if start > end {
  55. break;
  56. }
  57. let i = (start + end) / 2;
  58. let file_sha = unpack_name.call1(py, (i,))?;
  59. if !py_is_sha(&file_sha, py)? {
  60. return Err(PyTypeError::new_err("unpack_name returned non-sha object"));
  61. }
  62. match file_sha.extract::<&[u8]>(py).unwrap().cmp(sha) {
  63. std::cmp::Ordering::Less => {
  64. start = i + 1;
  65. }
  66. std::cmp::Ordering::Greater => {
  67. end = i - 1;
  68. }
  69. std::cmp::Ordering::Equal => {
  70. return Ok(Some(i));
  71. }
  72. }
  73. }
  74. Ok(None)
  75. }
  76. fn get_delta_header_size(delta: &[u8], index: &mut usize, length: usize) -> usize {
  77. let mut size: usize = 0;
  78. let mut i: usize = 0;
  79. while *index < length {
  80. let cmd = delta[*index];
  81. *index += 1;
  82. size |= ((cmd & !0x80) as usize) << i;
  83. i += 7;
  84. if cmd & 0x80 == 0 {
  85. break;
  86. }
  87. }
  88. size
  89. }
  90. fn py_chunked_as_string<'a>(py: Python<'a>, py_buf: &'a PyObject) -> PyResult<std::borrow::Cow<'a, [u8]>> {
  91. if let Ok(py_list) = py_buf.extract::<Bound<PyList>>(py) {
  92. let mut buf = Vec::new();
  93. for chunk in py_list.iter() {
  94. if let Ok(chunk) = chunk.extract::<&[u8]>() {
  95. buf.extend_from_slice(chunk);
  96. } else if let Ok(chunk) = chunk.extract::<Vec<u8>>() {
  97. buf.extend(chunk);
  98. } else {
  99. return Err(PyTypeError::new_err(format!("chunk is not a byte string, but a {:?}", chunk.get_type().name())));
  100. }
  101. }
  102. Ok(buf.into())
  103. } else if py_buf.extract::<Bound<PyBytes>>(py).is_ok() {
  104. Ok(std::borrow::Cow::Borrowed(py_buf.extract::<&[u8]>(py)?))
  105. } else {
  106. Err(PyTypeError::new_err("buf is not a string or a list of chunks"))
  107. }
  108. }
  109. #[pyfunction]
  110. fn apply_delta(py: Python, py_src_buf: PyObject, py_delta: PyObject) -> PyResult<Vec<PyObject>> {
  111. let src_buf = py_chunked_as_string(py, &py_src_buf)?;
  112. let delta = py_chunked_as_string(py, &py_delta)?;
  113. let src_buf_len = src_buf.len();
  114. let delta_len = delta.len();
  115. let mut index = 0;
  116. let src_size = get_delta_header_size(delta.as_ref(), &mut index, delta_len);
  117. if src_size != src_buf_len {
  118. return Err(ApplyDeltaError::new_err(format!(
  119. "Unexpected source buffer size: {} vs {}",
  120. src_size, src_buf_len
  121. )));
  122. }
  123. let dest_size = get_delta_header_size(delta.as_ref(), &mut index, delta_len);
  124. let mut out = vec![0; dest_size];
  125. let mut outindex = 0;
  126. while index < delta_len {
  127. let cmd = delta[index];
  128. index += 1;
  129. if cmd & 0x80 != 0 {
  130. let mut cp_off = 0;
  131. let mut cp_size = 0;
  132. for i in 0..4 {
  133. if cmd & (1 << i) != 0 {
  134. let x = delta[index] as usize;
  135. index += 1;
  136. cp_off |= x << (i * 8);
  137. }
  138. }
  139. for i in 0..3 {
  140. if cmd & (1 << (4 + i)) != 0 {
  141. let x = delta[index] as usize;
  142. index += 1;
  143. cp_size |= x << (i * 8);
  144. }
  145. }
  146. if cp_size == 0 {
  147. cp_size = 0x10000;
  148. }
  149. if cp_off + cp_size < cp_size
  150. || cp_off + cp_size > src_size
  151. || cp_size > dest_size
  152. {
  153. break;
  154. }
  155. out[outindex..outindex + cp_size].copy_from_slice(&src_buf[cp_off..cp_off + cp_size]);
  156. outindex += cp_size;
  157. } else if cmd != 0 {
  158. if (cmd as usize) > dest_size {
  159. break;
  160. }
  161. // Raise ApplyDeltaError if there are more bytes to copy than space
  162. if outindex + cmd as usize > dest_size {
  163. return Err(ApplyDeltaError::new_err("Not enough space to copy"));
  164. }
  165. out[outindex..outindex + cmd as usize].copy_from_slice(&delta[index..index + cmd as usize]);
  166. outindex += cmd as usize;
  167. index += cmd as usize;
  168. } else {
  169. return Err(ApplyDeltaError::new_err("Invalid opcode 0"));
  170. }
  171. }
  172. if index != delta_len {
  173. return Err(ApplyDeltaError::new_err("delta not empty"));
  174. }
  175. if outindex != dest_size {
  176. return Err(ApplyDeltaError::new_err("dest size incorrect"));
  177. }
  178. Ok(vec![PyBytes::new_bound(py, &out).into()])
  179. }
  180. #[pymodule]
  181. fn _pack(_py: Python, m: &Bound<PyModule>) -> PyResult<()> {
  182. m.add_function(wrap_pyfunction!(bisect_find_sha, m)?)?;
  183. m.add_function(wrap_pyfunction!(apply_delta, m)?)?;
  184. Ok(())
  185. }