Parcourir la source

Start the python-git project.

Aims to give an interface to git repos that doesn't call out to git directly.
Probably going to be pure python.

Currently can read blobs, trees and commits from the files. It reads both
legacy and new headers. However it is untested for anything but the simple
case.

Can also understand a little about the repository format.

The testsuite uses the nosetests program from Turbogears, as I got annoyed
trying to set up unittest.

Open up a repo by passing it the path to the .git dir. You can then ask for
HEAD with repo.head() or a ref with repo.ref(name). Both return the SHA id
they currently point to. You can then grab this object with
repo.get_object(sha).

For the actual objects the ShaFile.from_file(filename) will return the object
stored in the file whatever it is. To ensure you get the correct type then
call {Blob,Tree,Commit}.from_file(filename). I will add repo methods to do
this for you with file lookup soon.

There is also support for creating blobs. Blob.from_string(string) will create
a blob object from the string. You can then call blob.sha() to get the sha
object for this blob, and hexdigest() on that will get its ID. There is
currently no method that allows you to write it out though.

Everything is currently done with assertions, where much of it should probably
be exceptions. This was merely done for expediency. If you hit an assertion,
it either means you have done something wrong, there is corruption, or
you are trying an unsupported operation.
James Westby il y a 19 ans
commit
ba318695d2

+ 0 - 0
git/__init__.py


+ 283 - 0
git/objects.py

@@ -0,0 +1,283 @@
+import mmap
+import os
+import sha
+import zlib
+
+blob_id = "blob"
+tree_id = "tree"
+commit_id = "commit"
+parent_id = "parent"
+author_id = "author"
+committer_id = "committer"
+
+def _decompress(string):
+    dcomp = zlib.decompressobj()
+    dcomped = dcomp.decompress(string)
+    dcomped += dcomp.flush()
+    return dcomped
+
+def sha_to_hex(sha):
+  """Takes a string and returns the hex of the sha within"""
+  hexsha = ''
+  for c in sha:
+    if ord(c) < 16:
+      hexsha += "0%x" % ord(c)
+    else:
+      hexsha += "%x" % ord(c)
+  assert len(hexsha) == 40, "Incorrect length of sha1 string: %d" % \
+         len(hexsha)
+  return hexsha
+
+class ShaFile(object):
+  """A git SHA file."""
+
+  def _update_contents(self):
+    """Update the _contents from the _text"""
+    self._contents = [ord(c) for c in self._text]
+
+  @classmethod
+  def _parse_legacy_object(cls, map):
+    """Parse a legacy object, creating it and setting object._text"""
+    text = _decompress(map)
+    object = None
+    for posstype in type_map.keys():
+      if text.startswith(posstype):
+        object = type_map[posstype]()
+        text = text[len(posstype):]
+        break
+    assert object is not None, "%s is not a known object type" % text[:9]
+    assert text[0] == ' ', "%s is not a space" % text[0]
+    text = text[1:]
+    size = 0
+    i = 0
+    while text[0] >= '0' and text[0] <= '9':
+      if i > 0 and size == 0:
+        assert False, "Size is not in canonical format"
+      size = (size * 10) + int(text[0])
+      text = text[1:]
+      i += 1
+    object._size = size
+    assert text[0] == "\0", "Size not followed by null"
+    text = text[1:]
+    object._text = text
+    object._update_contents()
+    return object
+
+  @classmethod
+  def _parse_object(cls, map):
+    """Parse a new style object , creating it and setting object._text"""
+    used = 0
+    byte = ord(map[used])
+    used += 1
+    num_type = (byte >> 4) & 7
+    try:
+      object = num_type_map[num_type]()
+    except KeyError:
+      assert False, "Not a known type: %d" % num_type
+    while((byte & 0x80) != 0):
+      byte = ord(map[used])
+      used += 1
+    raw = map[used:]
+    object._text = _decompress(raw)
+    object._update_contents()
+    return object
+
+  @classmethod
+  def _parse_file(cls, map):
+    word = (ord(map[0]) << 8) + ord(map[1])
+    if ord(map[0]) == 0x78 and (word % 31) == 0:
+      return cls._parse_legacy_object(map)
+    else:
+      return cls._parse_object(map)
+
+  def __init__(self):
+    """Don't call this directly"""
+
+  def _parse_text(self):
+    """For subclasses to do initialistion time parsing"""
+
+  @classmethod
+  def from_file(cls, filename):
+    """Get the contents of a SHA file on disk"""
+    size = os.path.getsize(filename)
+    f = open(filename, 'rb+')
+    try:
+      map = mmap.mmap(f.fileno(), size)
+      shafile = cls._parse_file(map)
+      shafile._parse_text()
+      return shafile
+    finally:
+      f.close()
+
+  def _header(self):
+    return "%s %lu\0" % (self._type, len(self._contents))
+
+  def contents(self):
+    """The raw bytes of this object"""
+    return self._contents
+
+  def sha(self):
+    """The SHA1 object that is the name of this object."""
+    ressha = sha.new()
+    ressha.update(self._header())
+    ressha.update(self._text)
+    return ressha
+
+class Blob(ShaFile):
+  """A Git Blob object."""
+
+  _type = blob_id
+
+  def text(self):
+    """The text contained within the blob object."""
+    return self._text
+
+  @classmethod
+  def from_file(cls, filename):
+    blob = ShaFile.from_file(filename)
+    assert blob._type == cls._type, "%s is not a blob object" % filename
+    return blob
+
+  @classmethod
+  def from_string(cls, string):
+    """Create a blob from a string."""
+    shafile = cls()
+    shafile._text = string
+    shafile._update_contents()
+    return shafile
+
+class Tree(ShaFile):
+  """A Git tree object"""
+
+  _type = tree_id
+
+  @classmethod
+  def from_file(cls, filename):
+    tree = ShaFile.from_file(filename)
+    assert tree._type == cls._type, "%s is not a tree object" % filename
+    return tree
+
+  def entries(self):
+    """Reutrn a list of tuples describing the tree entries"""
+    return self._entries
+
+  def _parse_text(self):
+    """Grab the entries in the tree"""
+    self._entries = []
+    count = 0
+    while count < len(self._text):
+      mode = 0
+      chr = self._text[count]
+      while chr != ' ':
+        assert chr >= '0' and chr <= '7', "%s is not a valid mode char" % chr
+        mode = (mode << 3) + (ord(chr) - ord('0'))
+        count += 1
+        chr = self._text[count]
+      count += 1
+      chr = self._text[count]
+      name = ''
+      while chr != '\0':
+        name += chr
+        count += 1
+        chr = self._text[count]
+      count += 1
+      chr = self._text[count]
+      sha = self._text[count:count+20]
+      hexsha = sha_to_hex(sha)
+      self._entries.append((mode, name, hexsha))
+      count = count + 20
+
+class Commit(ShaFile):
+  """A git commit object"""
+
+  _type = commit_id
+
+  @classmethod
+  def from_file(cls, filename):
+    commit = ShaFile.from_file(filename)
+    assert commit._type == cls._type, "%s is not a commit object" % filename
+    return commit
+
+  def _parse_text(self):
+    text = self._text
+    count = 0
+    assert text.startswith(tree_id), "Invlid commit object, " \
+         "must start with %s" % tree_id
+    count += len(tree_id)
+    assert text[count] == ' ', "Invalid commit object, " \
+         "%s must be followed by space not %s" % (tree_id, text[count])
+    count += 1
+    self._tree = text[count:count+40]
+    count = count + 40
+    assert text[count] == "\n", "Invalid commit object, " \
+         "tree sha must be followed by newline"
+    count += 1
+    self._parents = []
+    while text[count:].startswith(parent_id):
+      count += len(parent_id)
+      assert text[count] == ' ', "Invalid commit object, " \
+           "%s must be followed by space not %s" % (parent_id, text[count])
+      count += 1
+      self._parents.append(text[count:count+40])
+      count += 40
+      assert text[count] == "\n", "Invalid commit object, " \
+           "parent sha must be followed by newline"
+      count += 1
+    self._author = None
+    if text[count:].startswith(author_id):
+      count += len(author_id)
+      assert text[count] == ' ', "Invalid commit object, " \
+           "%s must be followed by space not %s" % (author_id, text[count])
+      count += 1
+      self._author = ''
+      while text[count] != '\n':
+        self._author += text[count]
+        count += 1
+      count += 1
+    self._committer = None
+    if text[count:].startswith(committer_id):
+      count += len(committer_id)
+      assert text[count] == ' ', "Invalid commit object, " \
+           "%s must be followed by space not %s" % (committer_id, text[count])
+      count += 1
+      self._committer = ''
+      while text[count] != '\n':
+        self._committer += text[count]
+        count += 1
+      count += 1
+    assert text[count] == '\n', "There must be a new line after the headers"
+    count += 1
+    self._message = text[count:]
+
+  def tree(self):
+    """Returns the tree that is the state of this commit"""
+    return self._tree
+
+  def parents(self):
+    """Return a list of parents of this commit."""
+    return self._parents
+
+  def author(self):
+    """Returns the name of the author of the commit"""
+    return self._author
+
+  def committer(self):
+    """Returns the name of the committer of the commit"""
+    return self._committer
+
+  def message(self):
+    """Returns the commit message"""
+    return self._message
+
+type_map = {
+  blob_id : Blob,
+  tree_id : Tree,
+  commit_id : Commit,
+}
+
+num_type_map = {
+  1 : Commit,
+  2 : Tree,
+  3 : Blob,
+}
+

+ 52 - 0
git/repository.py

@@ -0,0 +1,52 @@
+import os
+
+from objects import ShaFile
+
+objectdir = 'objects'
+symref = 'ref: '
+
+class Repository(object):
+
+  ref_locs = ['', 'refs', 'refs/tags', 'refs/heads', 'refs/remotes']
+
+  def __init__(self, root):
+    self._basedir = root
+
+  def basedir(self):
+    return self._basedir
+
+  def object_dir(self):
+    return os.path.join(self.basedir(), objectdir)
+
+  def _get_ref(self, file):
+    f = open(file, 'rb')
+    try:
+      contents = f.read()
+      if contents.startswith(symref):
+        ref = contents[len(symref):]
+        if ref[-1] == '\n':
+          ref = ref[:-1]
+        return self.ref(ref)
+      assert len(contents) == 41, 'Invalid ref'
+      return contents[:-1]
+    finally:
+      f.close()
+
+  def ref(self, name):
+    for dir in self.ref_locs:
+      file = os.path.join(self.basedir(), dir, name)
+      if os.path.exists(file):
+        return self._get_ref(file)
+
+  def head(self):
+    return self.ref('HEAD')
+
+  def get_object(self, sha):
+    assert len(sha) == 40, "Incorrect sha length"
+    dir = sha[:2]
+    file = sha[2:]
+    path = os.path.join(self.object_dir(), dir, file)
+    if not os.path.exists(path):
+      return None
+    return ShaFile.from_file(path)
+

+ 11 - 0
git/tests/__init__.py

@@ -0,0 +1,11 @@
+import unittest
+import test_objects
+
+def test_suite():
+  test_modules = [test_objects]
+  loader = unittest.TestLoader()
+  suite = unittest.TestSuite()
+  for mod in test_modules:
+    suite.addTest(loader.loadTestsFromModule(mod))
+  return suite
+

BIN
git/tests/data/blobs/6f670c0fb53f9463760b7295fbb814e965fb20c8


BIN
git/tests/data/blobs/954a536f7819d40e6f637f849ee187dd10066349


BIN
git/tests/data/blobs/e69de29bb2d1d6434b8b29ae775ad8c2e48c5391


+ 2 - 0
git/tests/data/commits/0d89f20333fbb1d2f3a94da77f4981373d8f4310

@@ -0,0 +1,2 @@
+x°╔█K
+б@]о)z░Ы53в╝в==-&░▓√ЮМ█Я╬e=╗Бi:вЗ┐н"пZ≈=╒ГH)╒╟╖░╘r┬Х┼■З░°╛║≈>╖╝4хwY╢╪Ат╞M∙ряxИ©|щЯq=┐s)&л6DhЛ6ц{YЕ┤╧m/ЭLФXg?╚

BIN
git/tests/data/commits/5dac377bdded4c9aeb8dff595f0faeebcc8498cc


+ 2 - 0
git/tests/data/commits/60dacdc733de308bb77bb76ce0fb0f9b44c9769e

@@ -0,0 +1,2 @@
+xœ¥ŽË
+Â0E]ç+f/ÈäÑN"®]®g’	¶Ð*5"þ½¯Oð.Ï…ÃÉ—i¸€«¶¨a¶	UB¬E¤ÃRrŸ[’²P´\©öæÊ‹Î
°ÄTz靖-®zN¡0Q
)ZO¾Ä¼EÃ÷v¾,pàIopÒ[“'lÇǺ¨<ïÇ|ñfÖ¶k)P—œGXã{&K›þã0Ç÷?“y´MQ

+ 1 - 0
git/tests/data/repos/a/a

@@ -0,0 +1 @@
+file a

+ 1 - 0
git/tests/data/repos/a/b

@@ -0,0 +1 @@
+file b

+ 1 - 0
git/tests/data/repos/a/c

@@ -0,0 +1 @@
+file c

BIN
git/tests/data/trees/70c190eb48fa8bbb50ddc692a17b44cb781af7f6


+ 92 - 0
git/tests/test_objects.py

@@ -0,0 +1,92 @@
+import os
+import unittest
+
+from git.objects import (Blob,
+                         Tree,
+                         Commit,
+                         )
+
+a_sha = '6f670c0fb53f9463760b7295fbb814e965fb20c8'
+b_sha = '2969be3e8ee1c0222396a5611407e4769f14e54b'
+c_sha = '954a536f7819d40e6f637f849ee187dd10066349'
+tree_sha = '70c190eb48fa8bbb50ddc692a17b44cb781af7f6'
+
+class BlobReadTests(unittest.TestCase):
+  """Test decompression of blobs"""
+
+  def get_sha_file(self, obj, base, sha):
+    return obj.from_file(os.path.join(os.path.dirname(__file__),
+                                      'data', base, sha))
+
+  def get_blob(self, sha):
+    """Return the blob named sha from the test data dir"""
+    return self.get_sha_file(Blob, 'blobs', sha)
+
+  def get_tree(self, sha):
+    return self.get_sha_file(Tree, 'trees', sha)
+
+  def get_commit(self, sha):
+    return self.get_sha_file(Commit, 'commits', sha)
+
+  def test_decompress_simple_blob(self):
+    b = self.get_blob(a_sha)
+    self.assertEqual(b.text(), 'test 1\n')
+    self.assertEqual(b.sha().hexdigest(), a_sha)
+
+  def test_parse_empty_blob_object(self):
+    sha = 'e69de29bb2d1d6434b8b29ae775ad8c2e48c5391'
+    b = self.get_blob(sha)
+    self.assertEqual(b.text(), '')
+    self.assertEqual(b.sha().hexdigest(), sha)
+
+  def test_create_blob_from_string(self):
+    string = 'test 2\n'
+    b = Blob.from_string(string)
+    self.assertEqual(b.text(), string)
+    self.assertEqual(b.sha().hexdigest(), b_sha)
+
+  def test_parse_legacy_blob(self):
+    string = 'test 3\n'
+    b = self.get_blob(c_sha)
+    self.assertEqual(b.text(), string)
+    self.assertEqual(b.sha().hexdigest(), c_sha)
+
+  def test_read_tree_from_file(self):
+    t = self.get_tree(tree_sha)
+    self.assertEqual(t.entries()[0], (33188, 'a', a_sha))
+    self.assertEqual(t.entries()[1], (33188, 'b', b_sha))
+
+  def test_read_commit_from_file(self):
+    sha = '60dacdc733de308bb77bb76ce0fb0f9b44c9769e'
+    c = self.get_commit(sha)
+    self.assertEqual(c.tree(), tree_sha)
+    self.assertEqual(c.parents(), ['0d89f20333fbb1d2f3a94da77f4981373d8f4310'])
+    self.assertEqual(c.author(),
+        'James Westby <jw+debian@jameswestby.net> 1174759230 +0000')
+    self.assertEqual(c.committer(),
+        'James Westby <jw+debian@jameswestby.net> 1174759230 +0000')
+    self.assertEqual(c.message(), 'Test commit\n')
+
+  def test_read_commit_no_parents(self):
+    sha = '0d89f20333fbb1d2f3a94da77f4981373d8f4310'
+    c = self.get_commit(sha)
+    self.assertEqual(c.tree(), '90182552c4a85a45ec2a835cadc3451bebdfe870')
+    self.assertEqual(c.parents(), [])
+    self.assertEqual(c.author(),
+        'James Westby <jw+debian@jameswestby.net> 1174758034 +0000')
+    self.assertEqual(c.committer(),
+        'James Westby <jw+debian@jameswestby.net> 1174758034 +0000')
+    self.assertEqual(c.message(), 'Test commit\n')
+
+  def test_read_commit_two_parents(self):
+    sha = '5dac377bdded4c9aeb8dff595f0faeebcc8498cc'
+    c = self.get_commit(sha)
+    self.assertEqual(c.tree(), 'd80c186a03f423a81b39df39dc87fd269736ca86')
+    self.assertEqual(c.parents(), ['ab64bbdcc51b170d21588e5c5d391ee5c0c96dfd',
+                                   '4cffe90e0a41ad3f5190079d7c8f036bde29cbe6'])
+    self.assertEqual(c.author(),
+        'James Westby <jw+debian@jameswestby.net> 1174773719 +0000')
+    self.assertEqual(c.committer(),
+        'James Westby <jw+debian@jameswestby.net> 1174773719 +0000')
+    self.assertEqual(c.message(), 'Merge ../b\n')
+

+ 36 - 0
git/tests/test_repository.py

@@ -0,0 +1,36 @@
+import os
+import unittest
+
+from git.repository import Repository
+
+class RepositoryTests(unittest.TestCase):
+
+  def open_repo(self, name):
+    return Repository(os.path.join(os.path.dirname(__file__),
+                      'data/repos', name, '.git'))
+
+  def test_simple_props(self):
+    r = self.open_repo('a')
+    basedir = os.path.join(os.path.dirname(__file__), 'data/repos/a/.git')
+    self.assertEqual(r.basedir(), basedir)
+    self.assertEqual(r.object_dir(), os.path.join(basedir, 'objects'))
+
+  def test_ref(self):
+    r = self.open_repo('a')
+    self.assertEqual(r.ref('master'),
+                     'a90fa2d900a17e99b433217e988c4eb4a2e9a097')
+
+  def test_head(self):
+    r = self.open_repo('a')
+    self.assertEqual(r.head(), 'a90fa2d900a17e99b433217e988c4eb4a2e9a097')
+
+  def test_get_object(self):
+    r = self.open_repo('a')
+    obj = r.get_object(r.head())
+    self.assertEqual(obj._type, 'commit')
+
+  def test_get_object_non_existant(self):
+    r = self.open_repo('a')
+    obj = r.get_object('b91fa4d900g17e99b433218e988c4eb4a3e9a097')
+    self.assertEqual(obj, None)
+