Quellcode durchsuchen

Convert pack module to rust

Jelmer Vernooij vor 1 Jahr
Ursprung
Commit
e2521542d9
5 geänderte Dateien mit 217 neuen und 287 gelöschten Zeilen
  1. 0 1
      crates/pack/Cargo.toml
  2. 214 0
      crates/pack/src/lib.rs
  3. 0 282
      dulwich/_pack.c
  4. 2 3
      dulwich/porcelain.py
  5. 1 1
      setup.py

+ 0 - 1
crates/pack/Cargo.toml

@@ -5,7 +5,6 @@ edition = "2021"
 
 [lib]
 crate-type = ["cdylib"]
-path = "dulwich/_pack.rs"
 
 [dependencies]
 pyo3 = { version = ">=0.19", features = ["extension-module"]}

+ 214 - 0
crates/pack/src/lib.rs

@@ -0,0 +1,214 @@
+/*
+ * Copyright (C) 2009 Jelmer Vernooij <jelmer@jelmer.uk>
+ *
+ * Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
+ * General Public License as public by the Free Software Foundation; version 2.0
+ * or (at your option) any later version. You can redistribute it and/or
+ * modify it under the terms of either of these two licenses.
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * You should have received a copy of the licenses; if not, see
+ * <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
+ * and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
+ * License, Version 2.0.
+ */
+
+use pyo3::prelude::*;
+use pyo3::types::{PyList,PyBytes};
+use pyo3::exceptions::{PyValueError, PyTypeError};
+
+pyo3::import_exception!(dulwich.errors, ApplyDeltaError);
+
+fn py_is_sha(sha: &PyObject, py: Python) -> PyResult<bool> {
+    // Check if the object is a bytes object
+    if sha.as_ref(py).is_instance_of::<PyBytes>() {
+        // Check if the bytes object has a size of 20
+        if sha.extract::<&[u8]>(py)?.len() == 20 {
+            Ok(true)
+        } else {
+            Ok(false)
+        }
+    } else {
+        Ok(false)
+    }
+}
+
+#[pyfunction]
+fn bisect_find_sha(py: Python, start: i32, end: i32, sha: Py<PyBytes>, unpack_name: PyObject) -> PyResult<Option<i32>> {
+    // Convert sha_obj to a byte slice
+    let sha = sha.as_bytes(py);
+    let sha_len = sha.len();
+
+    // Check if sha is 20 bytes long
+    if sha_len != 20 {
+        return Err(PyValueError::new_err("Sha is not 20 bytes long"));
+    }
+
+    // Check if start > end
+    if start > end {
+        return Err(PyValueError::new_err("start > end"));
+    }
+
+    // Binary search loop
+    let mut start = start;
+    let mut end = end;
+    loop {
+        if start > end {
+            break;
+        }
+        let i = (start + end) / 2;
+
+        let file_sha = unpack_name.call1(py, (i,))?;
+        if !py_is_sha(&file_sha, py)? {
+            return Err(PyTypeError::new_err("unpack_name returned non-sha object"));
+        }
+
+        match file_sha.extract::<&[u8]>(py).unwrap().cmp(sha) {
+            std::cmp::Ordering::Less => {
+                start = i + 1;
+            }
+            std::cmp::Ordering::Greater => {
+                end = i - 1;
+            }
+            std::cmp::Ordering::Equal => {
+                return Ok(Some(i));
+            }
+        }
+    }
+
+    Ok(None)
+}
+
+fn get_delta_header_size(delta: &[u8], index: &mut usize, length: usize) -> usize {
+    let mut size: usize = 0;
+    let mut i: usize = 0;
+    while *index < length {
+        let cmd = delta[*index];
+        *index += 1;
+        size |= ((cmd & !0x80) as usize) << i;
+        i += 7;
+        if cmd & 0x80 == 0 {
+            break;
+        }
+    }
+    size
+}
+
+fn py_chunked_as_string<'a>(py: Python<'a>, py_buf: &'a PyObject) -> PyResult<std::borrow::Cow<'a, [u8]>> {
+    if let Ok(py_list) = py_buf.extract::<&PyList>(py) {
+        let mut buf = Vec::new();
+        for chunk in py_list.iter() {
+            if let Ok(chunk) = chunk.extract::<&[u8]>() {
+                buf.extend_from_slice(chunk);
+            } else if let Ok(chunk) = chunk.extract::<Vec<u8>>() {
+                buf.extend(chunk);
+            } else {
+                return Err(PyTypeError::new_err(format!("chunk is not a byte string, but a {:?}", chunk.get_type().name())));
+            }
+        }
+        Ok(buf.into())
+    } else if py_buf.extract::<&PyBytes>(py).is_ok() {
+        Ok(std::borrow::Cow::Borrowed(py_buf.extract::<&[u8]>(py)?))
+    } else {
+        Err(PyTypeError::new_err("buf is not a string or a list of chunks"))
+    }
+}
+
+#[pyfunction]
+fn apply_delta(py: Python, py_src_buf: PyObject, py_delta: PyObject) -> PyResult<Vec<PyObject>> {
+    let src_buf = py_chunked_as_string(py, &py_src_buf)?;
+    let delta = py_chunked_as_string(py, &py_delta)?;
+
+    let src_buf_len = src_buf.len();
+    let delta_len = delta.len();
+    let mut index = 0;
+
+    let src_size = get_delta_header_size(delta.as_ref(), &mut index, delta_len);
+    if src_size != src_buf_len {
+        return Err(ApplyDeltaError::new_err(format!(
+            "Unexpected source buffer size: {} vs {}",
+            src_size, src_buf_len
+        )));
+    }
+
+    let dest_size = get_delta_header_size(delta.as_ref(), &mut index, delta_len);
+    let mut out = vec![0; dest_size];
+    let mut outindex = 0;
+
+    while index < delta_len {
+        let cmd = delta[index];
+        index += 1;
+
+        if cmd & 0x80 != 0 {
+            let mut cp_off = 0;
+            let mut cp_size = 0;
+
+            for i in 0..4 {
+                if cmd & (1 << i) != 0 {
+                    let x = delta[index] as usize;
+                    index += 1;
+                    cp_off |= x << (i * 8);
+                }
+            }
+
+            for i in 0..3 {
+                if cmd & (1 << (4 + i)) != 0 {
+                    let x = delta[index] as usize;
+                    index += 1;
+                    cp_size |= x << (i * 8);
+                }
+            }
+
+            if cp_size == 0 {
+                cp_size = 0x10000;
+            }
+
+            if cp_off + cp_size < cp_size
+                || cp_off + cp_size > src_size
+                || cp_size > dest_size
+            {
+                break;
+            }
+
+            out[outindex..outindex + cp_size].copy_from_slice(&src_buf[cp_off..cp_off + cp_size]);
+            outindex += cp_size;
+        } else if cmd != 0 {
+            if (cmd as usize) > dest_size {
+                break;
+            }
+
+            // Raise ApplyDeltaError if there are more bytes to copy than space
+            if outindex + cmd as usize > dest_size {
+                return Err(ApplyDeltaError::new_err("Not enough space to copy"));
+            }
+
+            out[outindex..outindex + cmd as usize].copy_from_slice(&delta[index..index + cmd as usize]);
+            outindex += cmd as usize;
+            index += cmd as usize;
+        } else {
+            return Err(ApplyDeltaError::new_err("Invalid opcode 0"));
+        }
+    }
+
+    if index != delta_len {
+        return Err(ApplyDeltaError::new_err("delta not empty"));
+    }
+
+    if outindex != dest_size {
+        return Err(ApplyDeltaError::new_err("dest size incorrect"));
+    }
+
+    Ok(vec![PyBytes::new(py, &out).into()])
+}
+
+#[pymodule]
+fn _pack(_py: Python, m: &PyModule) -> PyResult<()> {
+    m.add_function(wrap_pyfunction!(bisect_find_sha, m)?)?;
+    m.add_function(wrap_pyfunction!(apply_delta, m)?)?;
+    Ok(())
+}

+ 0 - 282
dulwich/_pack.c

@@ -1,282 +0,0 @@
-/* 
- * Copyright (C) 2009 Jelmer Vernooij <jelmer@jelmer.uk>
- *
- * Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
- * General Public License as public by the Free Software Foundation; version 2.0
- * or (at your option) any later version. You can redistribute it and/or
- * modify it under the terms of either of these two licenses.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *
- * You should have received a copy of the licenses; if not, see
- * <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
- * and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
- * License, Version 2.0.
- */
-
-#define PY_SSIZE_T_CLEAN
-#include <Python.h>
-#include <stdint.h>
-
-static PyObject *PyExc_ApplyDeltaError = NULL;
-
-static int py_is_sha(PyObject *sha)
-{
-	if (!PyBytes_CheckExact(sha))
-		return 0;
-
-	if (PyBytes_Size(sha) != 20)
-		return 0;
-
-	return 1;
-}
-
-
-static size_t get_delta_header_size(uint8_t *delta, size_t *index, size_t length)
-{
-	size_t size = 0;
-	size_t i = 0;
-	while ((*index) < length) {
-		size_t cmd = delta[*index];
-		(*index)++;
-		size |= (cmd & ~0x80) << i;
-		i += 7;
-		if (!(cmd & 0x80))
-			break;
-	}
-	return size;
-}
-
-static PyObject *py_chunked_as_string(PyObject *py_buf)
-{
-	if (PyList_Check(py_buf)) {
-		PyObject *sep = PyBytes_FromString("");
-		if (sep == NULL) {
-			PyErr_NoMemory();
-			return NULL;
-		}
-		py_buf = _PyBytes_Join(sep, py_buf);
-		Py_DECREF(sep);
-		if (py_buf == NULL) {
-			PyErr_NoMemory();
-			return NULL;
-		}
-	} else if (PyBytes_Check(py_buf)) {
-		Py_INCREF(py_buf);
-	} else {
-		PyErr_SetString(PyExc_TypeError,
-			"src_buf is not a string or a list of chunks");
-		return NULL;
-	}
-    return py_buf;
-}
-
-static PyObject *py_apply_delta(PyObject *self, PyObject *args)
-{
-	uint8_t *src_buf, *delta;
-	size_t src_buf_len, delta_len;
-	size_t src_size, dest_size;
-	size_t outindex = 0;
-	size_t index;
-	uint8_t *out;
-	PyObject *ret, *py_src_buf, *py_delta, *ret_list;
-
-	if (!PyArg_ParseTuple(args, "OO", &py_src_buf, &py_delta))
-		return NULL;
-
-	py_src_buf = py_chunked_as_string(py_src_buf);
-	if (py_src_buf == NULL)
-		return NULL;
-
-	py_delta = py_chunked_as_string(py_delta);
-	if (py_delta == NULL) {
-		Py_DECREF(py_src_buf);
-		return NULL;
-	}
-
-	src_buf = (uint8_t *)PyBytes_AS_STRING(py_src_buf);
-	src_buf_len = (size_t)PyBytes_GET_SIZE(py_src_buf);
-
-	delta = (uint8_t *)PyBytes_AS_STRING(py_delta);
-	delta_len = (size_t)PyBytes_GET_SIZE(py_delta);
-
-	index = 0;
-	src_size = get_delta_header_size(delta, &index, delta_len);
-	if (src_size != src_buf_len) {
-		PyErr_Format(PyExc_ApplyDeltaError,
-					 "Unexpected source buffer size: %lu vs %ld", src_size, src_buf_len);
-		Py_DECREF(py_src_buf);
-		Py_DECREF(py_delta);
-		return NULL;
-	}
-	dest_size = get_delta_header_size(delta, &index, delta_len);
-	ret = PyBytes_FromStringAndSize(NULL, dest_size);
-	if (ret == NULL) {
-		PyErr_NoMemory();
-		Py_DECREF(py_src_buf);
-		Py_DECREF(py_delta);
-		return NULL;
-	}
-	out = (uint8_t *)PyBytes_AS_STRING(ret);
-	while (index < delta_len) {
-		uint8_t cmd = delta[index];
-		index++;
-		if (cmd & 0x80) {
-			size_t cp_off = 0, cp_size = 0;
-			int i;
-			for (i = 0; i < 4; i++) {
-				if (cmd & (1 << i)) {
-					unsigned x = delta[index];
-					index++;
-					cp_off |= x << (i * 8);
-				}
-			}
-			for (i = 0; i < 3; i++) {
-				if (cmd & (1 << (4+i))) {
-					unsigned x = delta[index];
-					index++;
-					cp_size |= x << (i * 8);
-				}
-			}
-			if (cp_size == 0)
-				cp_size = 0x10000;
-			if (cp_off + cp_size < cp_size ||
-				cp_off + cp_size > src_size ||
-				cp_size > dest_size)
-				break;
-			memcpy(out+outindex, src_buf+cp_off, cp_size);
-			outindex += cp_size;
-			dest_size -= cp_size;
-		} else if (cmd != 0) {
-			if (cmd > dest_size)
-				break;
-			memcpy(out+outindex, delta+index, cmd);
-			outindex += cmd;
-			index += cmd;
-			dest_size -= cmd;
-		} else {
-			PyErr_SetString(PyExc_ApplyDeltaError, "Invalid opcode 0");
-			Py_DECREF(ret);
-			Py_DECREF(py_delta);
-			Py_DECREF(py_src_buf);
-			return NULL;
-		}
-	}
-	Py_DECREF(py_src_buf);
-	Py_DECREF(py_delta);
-
-	if (index != delta_len) {
-		PyErr_SetString(PyExc_ApplyDeltaError, "delta not empty");
-		Py_DECREF(ret);
-		return NULL;
-	}
-
-	if (dest_size != 0) {
-		PyErr_SetString(PyExc_ApplyDeltaError, "dest size incorrect");
-		Py_DECREF(ret);
-		return NULL;
-	}
-
-	ret_list = Py_BuildValue("[N]", ret);
-	if (ret_list == NULL) {
-		Py_DECREF(ret);
-		return NULL;
-	}
-	return ret_list;
-}
-
-static PyObject *py_bisect_find_sha(PyObject *self, PyObject *args)
-{
-	PyObject *unpack_name;
-	char *sha;
-	Py_ssize_t sha_len;
-	int start, end;
-	if (!PyArg_ParseTuple(args, "iiy#O", &start, &end,
-			      &sha, &sha_len, &unpack_name))
-		return NULL;
-
-	if (sha_len != 20) {
-		PyErr_SetString(PyExc_ValueError, "Sha is not 20 bytes long");
-		return NULL;
-	}
-	if (start > end) {
-		PyErr_SetString(PyExc_AssertionError, "start > end");
-		return NULL;
-	}
-
-	while (start <= end) {
-		PyObject *file_sha;
-		Py_ssize_t i = (start + end)/2;
-		int cmp;
-		file_sha = PyObject_CallFunction(unpack_name, "i", i);
-		if (file_sha == NULL) {
-			return NULL;
-		}
-		if (!py_is_sha(file_sha)) {
-			PyErr_SetString(PyExc_TypeError, "unpack_name returned non-sha object");
-			Py_DECREF(file_sha);
-			return NULL;
-		}
-		cmp = memcmp(PyBytes_AS_STRING(file_sha), sha, 20);
-		Py_DECREF(file_sha);
-		if (cmp < 0)
-			start = i + 1;
-		else if (cmp > 0)
-			end = i - 1;
-		else {
-			return PyLong_FromLong(i);
-		}
-	}
-	Py_RETURN_NONE;
-}
-
-
-static PyMethodDef py_pack_methods[] = {
-	{ "apply_delta", (PyCFunction)py_apply_delta, METH_VARARGS, NULL },
-	{ "bisect_find_sha", (PyCFunction)py_bisect_find_sha, METH_VARARGS, NULL },
-	{ NULL, NULL, 0, NULL }
-};
-
-static PyObject *
-moduleinit(void)
-{
-	PyObject *m;
-	PyObject *errors_module;
-
-	static struct PyModuleDef moduledef = {
-	  PyModuleDef_HEAD_INIT,
-	  "_pack",         /* m_name */
-	  NULL,            /* m_doc */
-	  -1,              /* m_size */
-	  py_pack_methods, /* m_methods */
-	  NULL,            /* m_reload */
-	  NULL,            /* m_traverse */
-	  NULL,            /* m_clear*/
-	  NULL,            /* m_free */
-	};
-
-	errors_module = PyImport_ImportModule("dulwich.errors");
-	if (errors_module == NULL)
-		return NULL;
-
-	PyExc_ApplyDeltaError = PyObject_GetAttrString(errors_module, "ApplyDeltaError");
-	Py_DECREF(errors_module);
-	if (PyExc_ApplyDeltaError == NULL)
-		return NULL;
-
-	m = PyModule_Create(&moduledef);
-	if (m == NULL)
-		return NULL;
-
-	return m;
-}
-
-PyMODINIT_FUNC
-PyInit__pack(void)
-{
-	return moduleinit();
-}

+ 2 - 3
dulwich/porcelain.py

@@ -124,7 +124,7 @@ from .refs import (
     LOCAL_TAG_PREFIX,
     _import_remote_refs,
 )
-from .repo import BaseRepo, Repo
+from .repo import BaseRepo, Repo, get_user_identity
 from .server import (
     FileSystemBackend,
     ReceivePackHandler,
@@ -1054,8 +1054,7 @@ def tag_create(
             # Create the tag object
             tag_obj = Tag()
             if author is None:
-                # TODO(jelmer): Don't use repo private method.
-                author = r._get_user_identity(r.get_config_stack())
+                author = get_user_identity(r.get_config_stack())
             tag_obj.tagger = author
             tag_obj.message = message + "\n".encode(encoding)
             tag_obj.name = tag

+ 1 - 1
setup.py

@@ -36,12 +36,12 @@ if "__pypy__" not in sys.modules and sys.platform != "win32":
 optional = os.environ.get("CIBUILDWHEEL", "0") != "1"
 
 ext_modules = [
-    Extension("dulwich._pack", ["dulwich/_pack.c"], optional=optional),
     Extension("dulwich._diff_tree", ["dulwich/_diff_tree.c"], optional=optional),
 ]
 
 rust_extensions = [
     RustExtension("dulwich._objects", "crates/objects/Cargo.toml", binding=Binding.PyO3, optional=True),
+    RustExtension("dulwich._pack", "crates/pack/Cargo.toml", binding=Binding.PyO3, optional=True),
 ]
 
 # Ideally, setuptools would just provide a way to do this