浏览代码

Change parse_tree to return a list rather than a dict.

For future consistency checks, we will need to ensure that objects in
the tree are stored in sorted order when we parse the tree, so it makes
sense to be able to reuse parse_tree for this purpose.

Also added tests for both the C and Python versions of parse_tree. To
do this in one test run, we also need to hold onto the pure-python
implementation before importing the C version if available.
Dave Borowitz 15 年之前
父节点
当前提交
cb9edefdec
共有 3 个文件被更改,包括 36 次插入10 次删除
  1. 6 4
      dulwich/_objects.c
  2. 11 5
      dulwich/objects.py
  3. 19 1
      dulwich/tests/test_objects.py

+ 6 - 4
dulwich/_objects.c

@@ -44,7 +44,10 @@ static PyObject *py_parse_tree(PyObject *self, PyObject *args)
 	if (!PyArg_ParseTuple(args, "s#", &text, &len))
 	if (!PyArg_ParseTuple(args, "s#", &text, &len))
 		return NULL;
 		return NULL;
 
 
-	ret = PyDict_New();
+	/* TODO: currently this returns a list; if memory usage is a concern,
+	* consider rewriting as a custom iterator object */
+	ret = PyList_New(0);
+
 	if (ret == NULL) {
 	if (ret == NULL) {
 		return NULL;
 		return NULL;
 	}
 	}
@@ -71,19 +74,18 @@ static PyObject *py_parse_tree(PyObject *self, PyObject *args)
 			return NULL;
 			return NULL;
 		}
 		}
 
 
-		item = Py_BuildValue("(lN)", mode,
+		item = Py_BuildValue("(NlN)", name, mode,
 							 sha_to_pyhex((unsigned char *)text+namelen+1));
 							 sha_to_pyhex((unsigned char *)text+namelen+1));
 		if (item == NULL) {
 		if (item == NULL) {
 			Py_DECREF(ret);
 			Py_DECREF(ret);
 			Py_DECREF(name);
 			Py_DECREF(name);
 			return NULL;
 			return NULL;
 		}
 		}
-		if (PyDict_SetItem(ret, name, item) == -1) {
+		if (PyList_Append(ret, item) == -1) {
 			Py_DECREF(ret);
 			Py_DECREF(ret);
 			Py_DECREF(item);
 			Py_DECREF(item);
 			return NULL;
 			return NULL;
 		}
 		}
-		Py_DECREF(name);
 		Py_DECREF(item);
 		Py_DECREF(item);
 
 
 		text += namelen+21;
 		text += namelen+21;

+ 11 - 5
dulwich/objects.py

@@ -447,9 +447,8 @@ def parse_tree(text):
     """Parse a tree text.
     """Parse a tree text.
 
 
     :param text: Serialized text to parse
     :param text: Serialized text to parse
-    :return: Dictionary with names as keys, (mode, sha) tuples as values
+    :yields: tuples of (name, mode, sha)
     """
     """
-    ret = {}
     count = 0
     count = 0
     l = len(text)
     l = len(text)
     while count < l:
     while count < l:
@@ -459,8 +458,7 @@ def parse_tree(text):
         name = text[mode_end+1:name_end]
         name = text[mode_end+1:name_end]
         count = name_end+21
         count = name_end+21
         sha = text[name_end+1:count]
         sha = text[name_end+1:count]
-        ret[name] = (mode, sha_to_hex(sha))
-    return ret
+        yield (name, mode, sha_to_hex(sha))
 
 
 
 
 def serialize_tree(items):
 def serialize_tree(items):
@@ -560,7 +558,11 @@ class Tree(ShaFile):
 
 
     def _deserialize(self, chunks):
     def _deserialize(self, chunks):
         """Grab the entries in the tree"""
         """Grab the entries in the tree"""
-        self._entries = parse_tree("".join(chunks))
+        parsed_entries = parse_tree("".join(chunks))
+        # TODO: list comprehension is for efficiency in the common (small) case;
+        # if memory efficiency in the large case is a concern, use a genexp.
+        self._entries = dict([(n, (m, s)) for n, m, s in parsed_entries])
+        self._needs_parsing = False
 
 
     def _serialize(self):
     def _serialize(self):
         return list(serialize_tree(self.iteritems()))
         return list(serialize_tree(self.iteritems()))
@@ -725,6 +727,10 @@ for cls in OBJECT_CLASSES:
     _TYPE_MAP[cls.type_num] = cls
     _TYPE_MAP[cls.type_num] = cls
 
 
 
 
+
+# Hold on to the pure-python implementations for testing
+_parse_tree_py = parse_tree
+_sorted_tree_items_py = sorted_tree_items
 try:
 try:
     # Try to import C versions
     # Try to import C versions
     from dulwich._objects import parse_tree, sorted_tree_items
     from dulwich._objects import parse_tree, sorted_tree_items

+ 19 - 1
dulwich/tests/test_objects.py

@@ -27,6 +27,8 @@ import os
 import stat
 import stat
 import unittest
 import unittest
 
 
+import nose
+
 from dulwich.objects import (
 from dulwich.objects import (
     Blob,
     Blob,
     Tree,
     Tree,
@@ -35,6 +37,8 @@ from dulwich.objects import (
     format_timezone,
     format_timezone,
     hex_to_sha,
     hex_to_sha,
     parse_timezone,
     parse_timezone,
+    parse_tree,
+    _parse_tree_py,
     )
     )
 
 
 a_sha = '6f670c0fb53f9463760b7295fbb814e965fb20c8'
 a_sha = '6f670c0fb53f9463760b7295fbb814e965fb20c8'
@@ -256,7 +260,7 @@ class CommitDeserializationTests(unittest.TestCase):
         self.assertEquals([('extra-field', 'data')], c.extra)
         self.assertEquals([('extra-field', 'data')], c.extra)
 
 
 
 
-class TreeSerializationTests(unittest.TestCase):
+class TreeTests(unittest.TestCase):
 
 
     def test_simple(self):
     def test_simple(self):
         myhexsha = "d80c186a03f423a81b39df39dc87fd269736ca86"
         myhexsha = "d80c186a03f423a81b39df39dc87fd269736ca86"
@@ -272,6 +276,20 @@ class TreeSerializationTests(unittest.TestCase):
         x["a/c"] = (stat.S_IFDIR, "d80c186a03f423a81b39df39dc87fd269736ca86")
         x["a/c"] = (stat.S_IFDIR, "d80c186a03f423a81b39df39dc87fd269736ca86")
         self.assertEquals(["a.c", "a", "a/c"], [p[0] for p in x.iteritems()])
         self.assertEquals(["a.c", "a", "a/c"], [p[0] for p in x.iteritems()])
 
 
+    def _do_test_parse_tree(self, parse_tree):
+        o = Tree.from_file(os.path.join(os.path.dirname(__file__), 'data',
+                                        'trees', tree_sha))
+        self.assertEquals([('a', 0100644, a_sha), ('b', 0100644, b_sha)],
+                          list(parse_tree(o.as_raw_string())))
+
+    def test_parse_tree(self):
+        self._do_test_parse_tree(_parse_tree_py)
+
+    def test_parse_tree_extension(self):
+        if parse_tree is _parse_tree_py:
+            raise nose.SkipTest('parse_tree extension not found')
+        self._do_test_parse_tree(parse_tree)
+
 
 
 class TagSerializeTests(unittest.TestCase):
 class TagSerializeTests(unittest.TestCase):