Jelajahi Sumber

Change parse_tree to return a list rather than a dict.

For future consistency checks, we will need to ensure that objects in
the tree are stored in sorted order when we parse the tree, so it makes
sense to be able to reuse parse_tree for this purpose.

Also added tests for both the C and Python versions of parse_tree. To
do this in one test run, we also need to hold onto the pure-python
implementation before importing the C version if available.
Dave Borowitz 15 tahun lalu
induk
melakukan
cb9edefdec
3 mengubah file dengan 36 tambahan dan 10 penghapusan
  1. 6 4
      dulwich/_objects.c
  2. 11 5
      dulwich/objects.py
  3. 19 1
      dulwich/tests/test_objects.py

+ 6 - 4
dulwich/_objects.c

@@ -44,7 +44,10 @@ static PyObject *py_parse_tree(PyObject *self, PyObject *args)
 	if (!PyArg_ParseTuple(args, "s#", &text, &len))
 		return NULL;
 
-	ret = PyDict_New();
+	/* TODO: currently this returns a list; if memory usage is a concern,
+	* consider rewriting as a custom iterator object */
+	ret = PyList_New(0);
+
 	if (ret == NULL) {
 		return NULL;
 	}
@@ -71,19 +74,18 @@ static PyObject *py_parse_tree(PyObject *self, PyObject *args)
 			return NULL;
 		}
 
-		item = Py_BuildValue("(lN)", mode,
+		item = Py_BuildValue("(NlN)", name, mode,
 							 sha_to_pyhex((unsigned char *)text+namelen+1));
 		if (item == NULL) {
 			Py_DECREF(ret);
 			Py_DECREF(name);
 			return NULL;
 		}
-		if (PyDict_SetItem(ret, name, item) == -1) {
+		if (PyList_Append(ret, item) == -1) {
 			Py_DECREF(ret);
 			Py_DECREF(item);
 			return NULL;
 		}
-		Py_DECREF(name);
 		Py_DECREF(item);
 
 		text += namelen+21;

+ 11 - 5
dulwich/objects.py

@@ -447,9 +447,8 @@ def parse_tree(text):
     """Parse a tree text.
 
     :param text: Serialized text to parse
-    :return: Dictionary with names as keys, (mode, sha) tuples as values
+    :yields: tuples of (name, mode, sha)
     """
-    ret = {}
     count = 0
     l = len(text)
     while count < l:
@@ -459,8 +458,7 @@ def parse_tree(text):
         name = text[mode_end+1:name_end]
         count = name_end+21
         sha = text[name_end+1:count]
-        ret[name] = (mode, sha_to_hex(sha))
-    return ret
+        yield (name, mode, sha_to_hex(sha))
 
 
 def serialize_tree(items):
@@ -560,7 +558,11 @@ class Tree(ShaFile):
 
     def _deserialize(self, chunks):
         """Grab the entries in the tree"""
-        self._entries = parse_tree("".join(chunks))
+        parsed_entries = parse_tree("".join(chunks))
+        # TODO: list comprehension is for efficiency in the common (small) case;
+        # if memory efficiency in the large case is a concern, use a genexp.
+        self._entries = dict([(n, (m, s)) for n, m, s in parsed_entries])
+        self._needs_parsing = False
 
     def _serialize(self):
         return list(serialize_tree(self.iteritems()))
@@ -725,6 +727,10 @@ for cls in OBJECT_CLASSES:
     _TYPE_MAP[cls.type_num] = cls
 
 
+
+# Hold on to the pure-python implementations for testing
+_parse_tree_py = parse_tree
+_sorted_tree_items_py = sorted_tree_items
 try:
     # Try to import C versions
     from dulwich._objects import parse_tree, sorted_tree_items

+ 19 - 1
dulwich/tests/test_objects.py

@@ -27,6 +27,8 @@ import os
 import stat
 import unittest
 
+import nose
+
 from dulwich.objects import (
     Blob,
     Tree,
@@ -35,6 +37,8 @@ from dulwich.objects import (
     format_timezone,
     hex_to_sha,
     parse_timezone,
+    parse_tree,
+    _parse_tree_py,
     )
 
 a_sha = '6f670c0fb53f9463760b7295fbb814e965fb20c8'
@@ -256,7 +260,7 @@ class CommitDeserializationTests(unittest.TestCase):
         self.assertEquals([('extra-field', 'data')], c.extra)
 
 
-class TreeSerializationTests(unittest.TestCase):
+class TreeTests(unittest.TestCase):
 
     def test_simple(self):
         myhexsha = "d80c186a03f423a81b39df39dc87fd269736ca86"
@@ -272,6 +276,20 @@ class TreeSerializationTests(unittest.TestCase):
         x["a/c"] = (stat.S_IFDIR, "d80c186a03f423a81b39df39dc87fd269736ca86")
         self.assertEquals(["a.c", "a", "a/c"], [p[0] for p in x.iteritems()])
 
+    def _do_test_parse_tree(self, parse_tree):
+        o = Tree.from_file(os.path.join(os.path.dirname(__file__), 'data',
+                                        'trees', tree_sha))
+        self.assertEquals([('a', 0100644, a_sha), ('b', 0100644, b_sha)],
+                          list(parse_tree(o.as_raw_string())))
+
+    def test_parse_tree(self):
+        self._do_test_parse_tree(_parse_tree_py)
+
+    def test_parse_tree_extension(self):
+        if parse_tree is _parse_tree_py:
+            raise nose.SkipTest('parse_tree extension not found')
+        self._do_test_parse_tree(parse_tree)
+
 
 class TagSerializeTests(unittest.TestCase):