Pārlūkot izejas kodu

Provide C implementation of tree item sorter.

Jelmer Vernooij 15 gadi atpakaļ
vecāks
revīzija
ca3bea00d2
2 mainītis faili ar 158 papildinājumiem un 31 dzēšanām
  1. 110 10
      dulwich/_objects.c
  2. 48 21
      dulwich/objects.py

+ 110 - 10
dulwich/_objects.c

@@ -18,6 +18,8 @@
  */
 
 #include <Python.h>
+#include <stdlib.h>
+#include <sys/stat.h>
 
 #define bytehex(x) (((x)<0xa)?('0'+(x)):('a'-0xa+(x)))
 
@@ -49,8 +51,8 @@ static PyObject *py_parse_tree(PyObject *self, PyObject *args)
 
 	end = text + len;
 
-    while (text < end) {
-        long mode;
+	while (text < end) {
+		long mode;
 		mode = strtol(text, &text, 8);
 
 		if (*text != ' ') {
@@ -61,7 +63,7 @@ static PyObject *py_parse_tree(PyObject *self, PyObject *args)
 
 		text++;
 
-        namelen = strlen(text);
+		namelen = strlen(text);
 
 		name = PyString_FromStringAndSize(text, namelen);
 		if (name == NULL) {
@@ -69,12 +71,13 @@ static PyObject *py_parse_tree(PyObject *self, PyObject *args)
 			return NULL;
 		}
 
-        item = Py_BuildValue("(lN)", mode, sha_to_pyhex((unsigned char *)text+namelen+1));
-        if (item == NULL) {
-            Py_DECREF(ret);
+		item = Py_BuildValue("(lN)", mode,
+							 sha_to_pyhex((unsigned char *)text+namelen+1));
+		if (item == NULL) {
+			Py_DECREF(ret);
 			Py_DECREF(name);
-            return NULL;
-        }
+			return NULL;
+		}
 		if (PyDict_SetItem(ret, name, item) == -1) {
 			Py_DECREF(ret);
 			Py_DECREF(item);
@@ -84,13 +87,110 @@ static PyObject *py_parse_tree(PyObject *self, PyObject *args)
 		Py_DECREF(item);
 
 		text += namelen+21;
-    }
+	}
+
+	return ret;
+}
+
+struct tree_item {
+	const char *name;
+	int mode;
+	PyObject *tuple;
+};
+
+int cmp_tree_item(const void *_a, const void *_b)
+{
+	const struct tree_item *a = _a, *b = _b;
+	const char *remain_a, *remain_b;
+	int ret, common;
+	if (strlen(a->name) > strlen(b->name)) {
+		common = strlen(b->name);
+		remain_a = a->name + common;
+		remain_b = (S_ISDIR(b->mode)?"/":"");
+	} else if (strlen(b->name) > strlen(a->name)) { 
+		common = strlen(a->name);
+		remain_a = (S_ISDIR(a->mode)?"/":"");
+		remain_b = b->name + common;
+	} else { /* strlen(a->name) == strlen(b->name) */
+		common = 0;
+		remain_a = a->name;
+		remain_b = b->name;
+	}
+	ret = strncmp(a->name, b->name, common);
+	if (ret != 0)
+		return ret;
+	return strcmp(remain_a, remain_b);
+}
+
+static PyObject *py_sorted_tree_items(PyObject *self, PyObject *entries)
+{
+	struct tree_item *qsort_entries;
+	int num, i;
+	PyObject *ret;
+	Py_ssize_t pos = 0; 
+	PyObject *key, *value;
+
+	if (!PyDict_Check(entries)) {
+		PyErr_SetString(PyExc_TypeError, "Argument not a dictionary");
+		return NULL;
+	}
+
+	num = PyDict_Size(entries);
+	qsort_entries = malloc(num * sizeof(struct tree_item));
+	if (qsort_entries == NULL) {
+		PyErr_NoMemory();
+		return NULL;
+	}
+
+	i = 0;
+	while (PyDict_Next(entries, &pos, &key, &value)) {
+		PyObject *py_mode, *py_sha;
+		
+		if (PyTuple_Size(value) != 2) {
+			PyErr_SetString(PyExc_ValueError, "Tuple has invalid size");
+			free(qsort_entries);
+			return NULL;
+		}
+
+		py_mode = PyTuple_GET_ITEM(value, 0);
+		py_sha = PyTuple_GET_ITEM(value, 1);
+		qsort_entries[i].tuple = Py_BuildValue("(OOO)", key, py_mode, py_sha);
+		if (!PyString_CheckExact(key)) {
+			PyErr_SetString(PyExc_TypeError, "Name is not a string");
+			free(qsort_entries);
+			return NULL;
+		}
+		qsort_entries[i].name = PyString_AS_STRING(key);
+		if (!PyInt_CheckExact(py_mode)) {
+			PyErr_SetString(PyExc_TypeError, "Mode is not an int");
+			free(qsort_entries);
+			return NULL;
+		}
+		qsort_entries[i].mode = PyInt_AS_LONG(py_mode);
+		i++;
+	}
+
+	qsort(qsort_entries, num, sizeof(struct tree_item), cmp_tree_item);
+
+	ret = PyList_New(num);
+	if (ret == NULL) {
+		free(qsort_entries);
+		PyErr_NoMemory();
+		return NULL;
+	}
+
+	for (i = 0; i < num; i++) {
+		PyList_SET_ITEM(ret, i, qsort_entries[i].tuple);
+	}
+
+	free(qsort_entries);
 
-    return ret;
+	return ret;
 }
 
 static PyMethodDef py_objects_methods[] = {
 	{ "parse_tree", (PyCFunction)py_parse_tree, METH_VARARGS, NULL },
+	{ "sorted_tree_items", (PyCFunction)py_sorted_tree_items, METH_O, NULL },
 	{ NULL, NULL, 0, NULL }
 };
 

+ 48 - 21
dulwich/objects.py

@@ -53,7 +53,8 @@ TYPE_ID = "type"
 TAGGER_ID = "tagger"
 ENCODING_ID = "encoding"
 
-S_IFGITLINK	= 0160000
+S_IFGITLINK = 0160000
+
 def S_ISGITLINK(m):
     return (stat.S_IFMT(m) == S_IFGITLINK)
 
@@ -421,25 +422,54 @@ class Tag(ShaFile):
 
 
 def parse_tree(text):
+    """Parse a tree text.
+
+    :param text: Serialized text to parse
+    :return: Dictionary with names as keys, (mode, sha) tuples as values
+    """
     ret = {}
     count = 0
     l = len(text)
     while count < l:
         mode_end = text.index(' ', count)
         mode = int(text[count:mode_end], 8)
-
         name_end = text.index('\0', mode_end)
         name = text[mode_end+1:name_end]
-
         count = name_end+21
-
         sha = text[name_end+1:count]
-
         ret[name] = (mode, sha_to_hex(sha))
-
     return ret
 
 
+def serialize_tree(items):
+    """Serialize the items in a tree to a text.
+
+    :param items: Sorted iterable over (name, mode, sha) tuples
+    :return: Serialized tree text
+    """
+    f = StringIO()
+    for name, mode, hexsha in items:
+        f.write("%04o %s\0%s" % (mode, name, hex_to_sha(hexsha)))
+    return f.getvalue()
+
+
+def sorted_tree_items(entries):
+    """Iterate over a tree entries dictionary in the order in which 
+    the items would be serialized.
+
+    :param entries: Dictionary mapping names to (mode, sha) tuples
+    :return: Iterator over (name, mode, sha)
+    """
+    def cmp_entry((name1, value1), (name2, value2)):
+        if stat.S_ISDIR(value1[0]):
+            name1 += "/"
+        if stat.S_ISDIR(value2[0]):
+            name2 += "/"
+        return cmp(name1, name2)
+    for name, entry in sorted(entries.iteritems(), cmp=cmp_entry):
+        yield name, entry[0], entry[1]
+
+
 class Tree(ShaFile):
     """A Git tree object"""
 
@@ -494,19 +524,19 @@ class Tree(ShaFile):
     def entries(self):
         """Return a list of tuples describing the tree entries"""
         self._ensure_parsed()
-        # The order of this is different from iteritems() for historical reasons
-        return [(mode, name, hexsha) for (name, mode, hexsha) in self.iteritems()]
+        # The order of this is different from iteritems() for historical
+        # reasons
+        return [
+            (mode, name, hexsha) for (name, mode, hexsha) in self.iteritems()]
 
     def iteritems(self):
-        def cmp_entry((name1, value1), (name2, value2)):
-            if stat.S_ISDIR(value1[0]):
-                name1 += "/"
-            if stat.S_ISDIR(value2[0]):
-                name2 += "/"
-            return cmp(name1, name2)
+        """Iterate over all entries in the order in which they would be
+        serialized.
+
+        :return: Iterator over (name, mode, sha) tuples
+        """
         self._ensure_parsed()
-        for name, entry in sorted(self._entries.iteritems(), cmp=cmp_entry):
-            yield name, entry[0], entry[1]
+        return sorted_tree_items(self._entries)
 
     def _parse_text(self):
         """Grab the entries in the tree"""
@@ -514,10 +544,7 @@ class Tree(ShaFile):
         self._needs_parsing = False
 
     def serialize(self):
-        f = StringIO()
-        for name, mode, hexsha in self.iteritems():
-            f.write("%04o %s\0%s" % (mode, name, hex_to_sha(hexsha)))
-        self._text = f.getvalue()
+        self._text = serialize_tree(self.iteritems())
         self._needs_serialization = False
 
     def as_pretty_string(self):
@@ -682,6 +709,6 @@ num_type_map = {
 
 try:
     # Try to import C versions
-    from dulwich._objects import parse_tree
+    from dulwich._objects import parse_tree, sorted_tree_items
 except ImportError:
     pass