Parcourir la source

Start the python-git project.

Aims to give an interface to git repos that doesn't call out to git directly.
Probably going to be pure python.

Currently can read blobs, trees and commits from the files. It reads both
legacy and new headers. However it is untested for anything but the simple
case.

Can also understand a little about the repository format.

The testsuite uses the nosetests program from Turbogears, as I got annoyed
trying to set up unittest.

Open up a repo by passing it the path to the .git dir. You can then ask for
HEAD with repo.head() or a ref with repo.ref(name). Both return the SHA id
they currently point to. You can then grab this object with
repo.get_object(sha).

For the actual objects the ShaFile.from_file(filename) will return the object
stored in the file whatever it is. To ensure you get the correct type then
call {Blob,Tree,Commit}.from_file(filename). I will add repo methods to do
this for you with file lookup soon.

There is also support for creating blobs. Blob.from_string(string) will create
a blob object from the string. You can then call blob.sha() to get the sha
object for this blob, and hexdigest() on that will get its ID. There is
currently no method that allows you to write it out though.

Everything is currently done with assertions, where much of it should probably
be exceptions. This was merely done for expediency. If you hit an assertion,
it either means you have done something wrong, there is corruption, or
you are trying an unsupported operation.
James Westby il y a 18 ans
commit
a029d7b2cc
39 fichiers modifiés avec 1073 ajouts et 0 suppressions
  1. 0 0
      git/__init__.py
  2. 283 0
      git/objects.py
  3. 52 0
      git/repository.py
  4. 11 0
      git/tests/__init__.py
  5. BIN
      git/tests/data/blobs/6f670c0fb53f9463760b7295fbb814e965fb20c8
  6. BIN
      git/tests/data/blobs/954a536f7819d40e6f637f849ee187dd10066349
  7. BIN
      git/tests/data/blobs/e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
  8. 2 0
      git/tests/data/commits/0d89f20333fbb1d2f3a94da77f4981373d8f4310
  9. BIN
      git/tests/data/commits/5dac377bdded4c9aeb8dff595f0faeebcc8498cc
  10. 2 0
      git/tests/data/commits/60dacdc733de308bb77bb76ce0fb0f9b44c9769e
  11. 1 0
      git/tests/data/repos/a/.git/HEAD
  12. 5 0
      git/tests/data/repos/a/.git/config
  13. 1 0
      git/tests/data/repos/a/.git/description
  14. 15 0
      git/tests/data/repos/a/.git/hooks/applypatch-msg
  15. 22 0
      git/tests/data/repos/a/.git/hooks/commit-msg
  16. 8 0
      git/tests/data/repos/a/.git/hooks/post-commit
  17. 8 0
      git/tests/data/repos/a/.git/hooks/post-update
  18. 15 0
      git/tests/data/repos/a/.git/hooks/pre-applypatch
  19. 71 0
      git/tests/data/repos/a/.git/hooks/pre-commit
  20. 150 0
      git/tests/data/repos/a/.git/hooks/pre-rebase
  21. 285 0
      git/tests/data/repos/a/.git/hooks/update
  22. BIN
      git/tests/data/repos/a/.git/index
  23. 6 0
      git/tests/data/repos/a/.git/info/exclude
  24. 2 0
      git/tests/data/repos/a/.git/logs/HEAD
  25. 2 0
      git/tests/data/repos/a/.git/logs/refs/heads/master
  26. BIN
      git/tests/data/repos/a/.git/objects/2a/72d929692c41d8554c07f6301757ba18a65d91
  27. BIN
      git/tests/data/repos/a/.git/objects/4e/f30bbfe26431a69c3820d3a683df54d688f2ec
  28. BIN
      git/tests/data/repos/a/.git/objects/4f/2e6529203aa6d44b5af6e3292c837ceda003f9
  29. BIN
      git/tests/data/repos/a/.git/objects/7d/9a07d797595ef11344549b8d08198e48c15364
  30. BIN
      git/tests/data/repos/a/.git/objects/a2/96d0bb611188cabb256919f36bc30117cca005
  31. BIN
      git/tests/data/repos/a/.git/objects/a9/0fa2d900a17e99b433217e988c4eb4a2e9a097
  32. BIN
      git/tests/data/repos/a/.git/objects/ff/d47d45845a8f6576491e1edb97e3fe6a850e7f
  33. 1 0
      git/tests/data/repos/a/.git/refs/heads/master
  34. 1 0
      git/tests/data/repos/a/a
  35. 1 0
      git/tests/data/repos/a/b
  36. 1 0
      git/tests/data/repos/a/c
  37. BIN
      git/tests/data/trees/70c190eb48fa8bbb50ddc692a17b44cb781af7f6
  38. 92 0
      git/tests/test_objects.py
  39. 36 0
      git/tests/test_repository.py

+ 0 - 0
git/__init__.py


+ 283 - 0
git/objects.py

@@ -0,0 +1,283 @@
+import mmap
+import os
+import sha
+import zlib
+
+blob_id = "blob"
+tree_id = "tree"
+commit_id = "commit"
+parent_id = "parent"
+author_id = "author"
+committer_id = "committer"
+
+def _decompress(string):
+    dcomp = zlib.decompressobj()
+    dcomped = dcomp.decompress(string)
+    dcomped += dcomp.flush()
+    return dcomped
+
+def sha_to_hex(sha):
+  """Takes a string and returns the hex of the sha within"""
+  hexsha = ''
+  for c in sha:
+    if ord(c) < 16:
+      hexsha += "0%x" % ord(c)
+    else:
+      hexsha += "%x" % ord(c)
+  assert len(hexsha) == 40, "Incorrect length of sha1 string: %d" % \
+         len(hexsha)
+  return hexsha
+
+class ShaFile(object):
+  """A git SHA file."""
+
+  def _update_contents(self):
+    """Update the _contents from the _text"""
+    self._contents = [ord(c) for c in self._text]
+
+  @classmethod
+  def _parse_legacy_object(cls, map):
+    """Parse a legacy object, creating it and setting object._text"""
+    text = _decompress(map)
+    object = None
+    for posstype in type_map.keys():
+      if text.startswith(posstype):
+        object = type_map[posstype]()
+        text = text[len(posstype):]
+        break
+    assert object is not None, "%s is not a known object type" % text[:9]
+    assert text[0] == ' ', "%s is not a space" % text[0]
+    text = text[1:]
+    size = 0
+    i = 0
+    while text[0] >= '0' and text[0] <= '9':
+      if i > 0 and size == 0:
+        assert False, "Size is not in canonical format"
+      size = (size * 10) + int(text[0])
+      text = text[1:]
+      i += 1
+    object._size = size
+    assert text[0] == "\0", "Size not followed by null"
+    text = text[1:]
+    object._text = text
+    object._update_contents()
+    return object
+
+  @classmethod
+  def _parse_object(cls, map):
+    """Parse a new style object , creating it and setting object._text"""
+    used = 0
+    byte = ord(map[used])
+    used += 1
+    num_type = (byte >> 4) & 7
+    try:
+      object = num_type_map[num_type]()
+    except KeyError:
+      assert False, "Not a known type: %d" % num_type
+    while((byte & 0x80) != 0):
+      byte = ord(map[used])
+      used += 1
+    raw = map[used:]
+    object._text = _decompress(raw)
+    object._update_contents()
+    return object
+
+  @classmethod
+  def _parse_file(cls, map):
+    word = (ord(map[0]) << 8) + ord(map[1])
+    if ord(map[0]) == 0x78 and (word % 31) == 0:
+      return cls._parse_legacy_object(map)
+    else:
+      return cls._parse_object(map)
+
+  def __init__(self):
+    """Don't call this directly"""
+
+  def _parse_text(self):
+    """For subclasses to do initialistion time parsing"""
+
+  @classmethod
+  def from_file(cls, filename):
+    """Get the contents of a SHA file on disk"""
+    size = os.path.getsize(filename)
+    f = open(filename, 'rb+')
+    try:
+      map = mmap.mmap(f.fileno(), size)
+      shafile = cls._parse_file(map)
+      shafile._parse_text()
+      return shafile
+    finally:
+      f.close()
+
+  def _header(self):
+    return "%s %lu\0" % (self._type, len(self._contents))
+
+  def contents(self):
+    """The raw bytes of this object"""
+    return self._contents
+
+  def sha(self):
+    """The SHA1 object that is the name of this object."""
+    ressha = sha.new()
+    ressha.update(self._header())
+    ressha.update(self._text)
+    return ressha
+
+class Blob(ShaFile):
+  """A Git Blob object."""
+
+  _type = blob_id
+
+  def text(self):
+    """The text contained within the blob object."""
+    return self._text
+
+  @classmethod
+  def from_file(cls, filename):
+    blob = ShaFile.from_file(filename)
+    assert blob._type == cls._type, "%s is not a blob object" % filename
+    return blob
+
+  @classmethod
+  def from_string(cls, string):
+    """Create a blob from a string."""
+    shafile = cls()
+    shafile._text = string
+    shafile._update_contents()
+    return shafile
+
+class Tree(ShaFile):
+  """A Git tree object"""
+
+  _type = tree_id
+
+  @classmethod
+  def from_file(cls, filename):
+    tree = ShaFile.from_file(filename)
+    assert tree._type == cls._type, "%s is not a tree object" % filename
+    return tree
+
+  def entries(self):
+    """Reutrn a list of tuples describing the tree entries"""
+    return self._entries
+
+  def _parse_text(self):
+    """Grab the entries in the tree"""
+    self._entries = []
+    count = 0
+    while count < len(self._text):
+      mode = 0
+      chr = self._text[count]
+      while chr != ' ':
+        assert chr >= '0' and chr <= '7', "%s is not a valid mode char" % chr
+        mode = (mode << 3) + (ord(chr) - ord('0'))
+        count += 1
+        chr = self._text[count]
+      count += 1
+      chr = self._text[count]
+      name = ''
+      while chr != '\0':
+        name += chr
+        count += 1
+        chr = self._text[count]
+      count += 1
+      chr = self._text[count]
+      sha = self._text[count:count+20]
+      hexsha = sha_to_hex(sha)
+      self._entries.append((mode, name, hexsha))
+      count = count + 20
+
+class Commit(ShaFile):
+  """A git commit object"""
+
+  _type = commit_id
+
+  @classmethod
+  def from_file(cls, filename):
+    commit = ShaFile.from_file(filename)
+    assert commit._type == cls._type, "%s is not a commit object" % filename
+    return commit
+
+  def _parse_text(self):
+    text = self._text
+    count = 0
+    assert text.startswith(tree_id), "Invlid commit object, " \
+         "must start with %s" % tree_id
+    count += len(tree_id)
+    assert text[count] == ' ', "Invalid commit object, " \
+         "%s must be followed by space not %s" % (tree_id, text[count])
+    count += 1
+    self._tree = text[count:count+40]
+    count = count + 40
+    assert text[count] == "\n", "Invalid commit object, " \
+         "tree sha must be followed by newline"
+    count += 1
+    self._parents = []
+    while text[count:].startswith(parent_id):
+      count += len(parent_id)
+      assert text[count] == ' ', "Invalid commit object, " \
+           "%s must be followed by space not %s" % (parent_id, text[count])
+      count += 1
+      self._parents.append(text[count:count+40])
+      count += 40
+      assert text[count] == "\n", "Invalid commit object, " \
+           "parent sha must be followed by newline"
+      count += 1
+    self._author = None
+    if text[count:].startswith(author_id):
+      count += len(author_id)
+      assert text[count] == ' ', "Invalid commit object, " \
+           "%s must be followed by space not %s" % (author_id, text[count])
+      count += 1
+      self._author = ''
+      while text[count] != '\n':
+        self._author += text[count]
+        count += 1
+      count += 1
+    self._committer = None
+    if text[count:].startswith(committer_id):
+      count += len(committer_id)
+      assert text[count] == ' ', "Invalid commit object, " \
+           "%s must be followed by space not %s" % (committer_id, text[count])
+      count += 1
+      self._committer = ''
+      while text[count] != '\n':
+        self._committer += text[count]
+        count += 1
+      count += 1
+    assert text[count] == '\n', "There must be a new line after the headers"
+    count += 1
+    self._message = text[count:]
+
+  def tree(self):
+    """Returns the tree that is the state of this commit"""
+    return self._tree
+
+  def parents(self):
+    """Return a list of parents of this commit."""
+    return self._parents
+
+  def author(self):
+    """Returns the name of the author of the commit"""
+    return self._author
+
+  def committer(self):
+    """Returns the name of the committer of the commit"""
+    return self._committer
+
+  def message(self):
+    """Returns the commit message"""
+    return self._message
+
+type_map = {
+  blob_id : Blob,
+  tree_id : Tree,
+  commit_id : Commit,
+}
+
+num_type_map = {
+  1 : Commit,
+  2 : Tree,
+  3 : Blob,
+}
+

+ 52 - 0
git/repository.py

@@ -0,0 +1,52 @@
+import os
+
+from objects import ShaFile
+
+objectdir = 'objects'
+symref = 'ref: '
+
+class Repository(object):
+
+  ref_locs = ['', 'refs', 'refs/tags', 'refs/heads', 'refs/remotes']
+
+  def __init__(self, root):
+    self._basedir = root
+
+  def basedir(self):
+    return self._basedir
+
+  def object_dir(self):
+    return os.path.join(self.basedir(), objectdir)
+
+  def _get_ref(self, file):
+    f = open(file, 'rb')
+    try:
+      contents = f.read()
+      if contents.startswith(symref):
+        ref = contents[len(symref):]
+        if ref[-1] == '\n':
+          ref = ref[:-1]
+        return self.ref(ref)
+      assert len(contents) == 41, 'Invalid ref'
+      return contents[:-1]
+    finally:
+      f.close()
+
+  def ref(self, name):
+    for dir in self.ref_locs:
+      file = os.path.join(self.basedir(), dir, name)
+      if os.path.exists(file):
+        return self._get_ref(file)
+
+  def head(self):
+    return self.ref('HEAD')
+
+  def get_object(self, sha):
+    assert len(sha) == 40, "Incorrect sha length"
+    dir = sha[:2]
+    file = sha[2:]
+    path = os.path.join(self.object_dir(), dir, file)
+    if not os.path.exists(path):
+      return None
+    return ShaFile.from_file(path)
+

+ 11 - 0
git/tests/__init__.py

@@ -0,0 +1,11 @@
+import unittest
+import test_objects
+
+def test_suite():
+  test_modules = [test_objects]
+  loader = unittest.TestLoader()
+  suite = unittest.TestSuite()
+  for mod in test_modules:
+    suite.addTest(loader.loadTestsFromModule(mod))
+  return suite
+

BIN
git/tests/data/blobs/6f670c0fb53f9463760b7295fbb814e965fb20c8


BIN
git/tests/data/blobs/954a536f7819d40e6f637f849ee187dd10066349


BIN
git/tests/data/blobs/e69de29bb2d1d6434b8b29ae775ad8c2e48c5391


+ 2 - 0
git/tests/data/commits/0d89f20333fbb1d2f3a94da77f4981373d8f4310

@@ -0,0 +1,2 @@
+x°╔█K
+б@]о)z░Ы53в╝в==-&░▓√ЮМ█Я╬e=╗Бi:вЗ┐н"пZ≈=╒ГH)╒╟╖░╘r┬Х┼■З░°╛║≈>╖╝4хwY╢╪Ат╞M∙ряxИ©|щЯq=┐s)&л6DhЛ6ц{YЕ┤╧m/ЭLФXg?╚

BIN
git/tests/data/commits/5dac377bdded4c9aeb8dff595f0faeebcc8498cc


+ 2 - 0
git/tests/data/commits/60dacdc733de308bb77bb76ce0fb0f9b44c9769e

@@ -0,0 +1,2 @@
+xœ¥ŽË
+Â0E]ç+f/ÈäÑN"®]®g’	¶Ð*5"þ½¯Oð.Ï…ÃÉ—i¸€«¶¨a¶	UB¬E¤ÃRrŸ[’²P´\©öæÊ‹Î
°ÄTz靖-®zN¡0Q
)ZO¾Ä¼EÃ÷v¾,pàIopÒ[“'lÇǺ¨<ïÇ|ñfÖ¶k)P—œGXã{&K›þã0Ç÷?“y´MQ

+ 1 - 0
git/tests/data/repos/a/.git/HEAD

@@ -0,0 +1 @@
+ref: refs/heads/master

+ 5 - 0
git/tests/data/repos/a/.git/config

@@ -0,0 +1,5 @@
+[core]
+	repositoryformatversion = 0
+	filemode = true
+	bare = false
+	logallrefupdates = true

+ 1 - 0
git/tests/data/repos/a/.git/description

@@ -0,0 +1 @@
+Unnamed repository; edit this file to name it for gitweb.

+ 15 - 0
git/tests/data/repos/a/.git/hooks/applypatch-msg

@@ -0,0 +1,15 @@
+#!/bin/sh
+#
+# An example hook script to check the commit log message taken by
+# applypatch from an e-mail message.
+#
+# The hook should exit with non-zero status after issuing an
+# appropriate message if it wants to stop the commit.  The hook is
+# allowed to edit the commit message file.
+#
+# To enable this hook, make this file executable.
+
+. git-sh-setup
+test -x "$GIT_DIR/hooks/commit-msg" &&
+	exec "$GIT_DIR/hooks/commit-msg" ${1+"$@"}
+:

+ 22 - 0
git/tests/data/repos/a/.git/hooks/commit-msg

@@ -0,0 +1,22 @@
+#!/bin/sh
+#
+# An example hook script to check the commit log message.
+# Called by git-commit with one argument, the name of the file
+# that has the commit message.  The hook should exit with non-zero
+# status after issuing an appropriate message if it wants to stop the
+# commit.  The hook is allowed to edit the commit message file.
+#
+# To enable this hook, make this file executable.
+
+# Uncomment the below to add a Signed-off-by line to the message.
+# SOB=$(git var GIT_AUTHOR_IDENT | sed -n 's/^\(.*>\).*$/Signed-off-by: \1/p')
+# grep -qs "^$SOB" "$1" || echo "$SOB" >> "$1"
+
+# This example catches duplicate Signed-off-by lines.
+
+test "" = "$(grep '^Signed-off-by: ' "$1" |
+	 sort | uniq -c | sed -e '/^[ 	]*1[ 	]/d')" || {
+	echo >&2 Duplicate Signed-off-by lines.
+	exit 1
+}
+

+ 8 - 0
git/tests/data/repos/a/.git/hooks/post-commit

@@ -0,0 +1,8 @@
+#!/bin/sh
+#
+# An example hook script that is called after a successful
+# commit is made.
+#
+# To enable this hook, make this file executable.
+
+: Nothing

+ 8 - 0
git/tests/data/repos/a/.git/hooks/post-update

@@ -0,0 +1,8 @@
+#!/bin/sh
+#
+# An example hook script to prepare a packed repository for use over
+# dumb transports.
+#
+# To enable this hook, make this file executable by "chmod +x post-update".
+
+exec git-update-server-info

+ 15 - 0
git/tests/data/repos/a/.git/hooks/pre-applypatch

@@ -0,0 +1,15 @@
+#!/bin/sh
+#
+# An example hook script to verify what is about to be committed
+# by applypatch from an e-mail message.
+#
+# The hook should exit with non-zero status after issuing an
+# appropriate message if it wants to stop the commit.
+#
+# To enable this hook, make this file executable.
+
+. git-sh-setup
+test -x "$GIT_DIR/hooks/pre-commit" &&
+	exec "$GIT_DIR/hooks/pre-commit" ${1+"$@"}
+:
+

+ 71 - 0
git/tests/data/repos/a/.git/hooks/pre-commit

@@ -0,0 +1,71 @@
+#!/bin/sh
+#
+# An example hook script to verify what is about to be committed.
+# Called by git-commit with no arguments.  The hook should
+# exit with non-zero status after issuing an appropriate message if
+# it wants to stop the commit.
+#
+# To enable this hook, make this file executable.
+
+# This is slightly modified from Andrew Morton's Perfect Patch.
+# Lines you introduce should not have trailing whitespace.
+# Also check for an indentation that has SP before a TAB.
+
+if git-rev-parse --verify HEAD 2>/dev/null
+then
+	git-diff-index -p -M --cached HEAD
+else
+	# NEEDSWORK: we should produce a diff with an empty tree here
+	# if we want to do the same verification for the initial import.
+	:
+fi |
+perl -e '
+    my $found_bad = 0;
+    my $filename;
+    my $reported_filename = "";
+    my $lineno;
+    sub bad_line {
+	my ($why, $line) = @_;
+	if (!$found_bad) {
+	    print STDERR "*\n";
+	    print STDERR "* You have some suspicious patch lines:\n";
+	    print STDERR "*\n";
+	    $found_bad = 1;
+	}
+	if ($reported_filename ne $filename) {
+	    print STDERR "* In $filename\n";
+	    $reported_filename = $filename;
+	}
+	print STDERR "* $why (line $lineno)\n";
+	print STDERR "$filename:$lineno:$line\n";
+    }
+    while (<>) {
+	if (m|^diff --git a/(.*) b/\1$|) {
+	    $filename = $1;
+	    next;
+	}
+	if (/^@@ -\S+ \+(\d+)/) {
+	    $lineno = $1 - 1;
+	    next;
+	}
+	if (/^ /) {
+	    $lineno++;
+	    next;
+	}
+	if (s/^\+//) {
+	    $lineno++;
+	    chomp;
+	    if (/\s$/) {
+		bad_line("trailing whitespace", $_);
+	    }
+	    if (/^\s* 	/) {
+		bad_line("indent SP followed by a TAB", $_);
+	    }
+	    if (/^(?:[<>=]){7}/) {
+		bad_line("unresolved merge conflict", $_);
+	    }
+	}
+    }
+    exit($found_bad);
+'
+

+ 150 - 0
git/tests/data/repos/a/.git/hooks/pre-rebase

@@ -0,0 +1,150 @@
+#!/bin/sh
+#
+# Copyright (c) 2006 Junio C Hamano
+#
+
+publish=next
+basebranch="$1"
+if test "$#" = 2
+then
+	topic="refs/heads/$2"
+else
+	topic=`git symbolic-ref HEAD`
+fi
+
+case "$basebranch,$topic" in
+master,refs/heads/??/*)
+	;;
+*)
+	exit 0 ;# we do not interrupt others.
+	;;
+esac
+
+# Now we are dealing with a topic branch being rebased
+# on top of master.  Is it OK to rebase it?
+
+# Is topic fully merged to master?
+not_in_master=`git-rev-list --pretty=oneline ^master "$topic"`
+if test -z "$not_in_master"
+then
+	echo >&2 "$topic is fully merged to master; better remove it."
+	exit 1 ;# we could allow it, but there is no point.
+fi
+
+# Is topic ever merged to next?  If so you should not be rebasing it.
+only_next_1=`git-rev-list ^master "^$topic" ${publish} | sort`
+only_next_2=`git-rev-list ^master           ${publish} | sort`
+if test "$only_next_1" = "$only_next_2"
+then
+	not_in_topic=`git-rev-list "^$topic" master`
+	if test -z "$not_in_topic"
+	then
+		echo >&2 "$topic is already up-to-date with master"
+		exit 1 ;# we could allow it, but there is no point.
+	else
+		exit 0
+	fi
+else
+	not_in_next=`git-rev-list --pretty=oneline ^${publish} "$topic"`
+	perl -e '
+		my $topic = $ARGV[0];
+		my $msg = "* $topic has commits already merged to public branch:\n";
+		my (%not_in_next) = map {
+			/^([0-9a-f]+) /;
+			($1 => 1);
+		} split(/\n/, $ARGV[1]);
+		for my $elem (map {
+				/^([0-9a-f]+) (.*)$/;
+				[$1 => $2];
+			} split(/\n/, $ARGV[2])) {
+			if (!exists $not_in_next{$elem->[0]}) {
+				if ($msg) {
+					print STDERR $msg;
+					undef $msg;
+				}
+				print STDERR " $elem->[1]\n";
+			}
+		}
+	' "$topic" "$not_in_next" "$not_in_master"
+	exit 1
+fi
+
+exit 0
+
+################################################################
+
+This sample hook safeguards topic branches that have been
+published from being rewound.
+
+The workflow assumed here is:
+
+ * Once a topic branch forks from "master", "master" is never
+   merged into it again (either directly or indirectly).
+
+ * Once a topic branch is fully cooked and merged into "master",
+   it is deleted.  If you need to build on top of it to correct
+   earlier mistakes, a new topic branch is created by forking at
+   the tip of the "master".  This is not strictly necessary, but
+   it makes it easier to keep your history simple.
+
+ * Whenever you need to test or publish your changes to topic
+   branches, merge them into "next" branch.
+
+The script, being an example, hardcodes the publish branch name
+to be "next", but it is trivial to make it configurable via
+$GIT_DIR/config mechanism.
+
+With this workflow, you would want to know:
+
+(1) ... if a topic branch has ever been merged to "next".  Young
+    topic branches can have stupid mistakes you would rather
+    clean up before publishing, and things that have not been
+    merged into other branches can be easily rebased without
+    affecting other people.  But once it is published, you would
+    not want to rewind it.
+
+(2) ... if a topic branch has been fully merged to "master".
+    Then you can delete it.  More importantly, you should not
+    build on top of it -- other people may already want to
+    change things related to the topic as patches against your
+    "master", so if you need further changes, it is better to
+    fork the topic (perhaps with the same name) afresh from the
+    tip of "master".
+
+Let's look at this example:
+
+		   o---o---o---o---o---o---o---o---o---o "next"
+		  /       /           /           /
+		 /   a---a---b A     /           /
+		/   /               /           /
+	       /   /   c---c---c---c B         /
+	      /   /   /             \         /
+	     /   /   /   b---b C     \       /
+	    /   /   /   /             \     /
+    ---o---o---o---o---o---o---o---o---o---o---o "master"
+
+
+A, B and C are topic branches.
+
+ * A has one fix since it was merged up to "next".
+
+ * B has finished.  It has been fully merged up to "master" and "next",
+   and is ready to be deleted.
+
+ * C has not merged to "next" at all.
+
+We would want to allow C to be rebased, refuse A, and encourage
+B to be deleted.
+
+To compute (1):
+
+	git-rev-list ^master ^topic next
+	git-rev-list ^master        next
+
+	if these match, topic has not merged in next at all.
+
+To compute (2):
+
+	git-rev-list master..topic
+
+	if this is empty, it is fully merged to "master".

+ 285 - 0
git/tests/data/repos/a/.git/hooks/update

@@ -0,0 +1,285 @@
+#!/bin/sh
+#
+# An example hook script to mail out commit update information.
+# It can also blocks tags that aren't annotated.
+# Called by git-receive-pack with arguments: refname sha1-old sha1-new
+#
+# To enable this hook, make this file executable by "chmod +x update".
+#
+# Config
+# ------
+# hooks.mailinglist
+#   This is the list that all pushes will go to; leave it blank to not send
+#   emails frequently.  The log email will list every log entry in full between
+#   the old ref value and the new ref value.
+# hooks.announcelist
+#   This is the list that all pushes of annotated tags will go to.  Leave it
+#   blank to just use the mailinglist field.  The announce emails list the
+#   short log summary of the changes since the last annotated tag
+# hooks.allowunannotated
+#   This boolean sets whether unannotated tags will be allowed into the
+#   repository.  By default they won't be.
+#
+# Notes
+# -----
+# All emails have their subjects prefixed with "[SCM]" to aid filtering.
+# All emails include the headers "X-Git-Refname", "X-Git-Oldrev",
+# "X-Git-Newrev", and "X-Git-Reftype" to enable fine tuned filtering and info.
+
+# --- Constants
+EMAILPREFIX="[SCM] "
+LOGBEGIN="- Log -----------------------------------------------------------------"
+LOGEND="-----------------------------------------------------------------------"
+DATEFORMAT="%F %R %z"
+
+# --- Command line
+refname="$1"
+oldrev="$2"
+newrev="$3"
+
+# --- Safety check
+if [ -z "$GIT_DIR" ]; then
+	echo "Don't run this script from the command line." >&2
+	echo " (if you want, you could supply GIT_DIR then run" >&2
+	echo "  $0 <ref> <oldrev> <newrev>)" >&2
+	exit 1
+fi
+
+if [ -z "$refname" -o -z "$oldrev" -o -z "$newrev" ]; then
+	echo "Usage: $0 <ref> <oldrev> <newrev>" >&2
+	exit 1
+fi
+
+# --- Config
+projectdesc=$(cat $GIT_DIR/description)
+recipients=$(git-repo-config hooks.mailinglist)
+announcerecipients=$(git-repo-config hooks.announcelist)
+allowunannotated=$(git-repo-config --bool hooks.allowunannotated)
+
+# --- Check types
+newrev_type=$(git-cat-file -t "$newrev")
+
+case "$refname","$newrev_type" in
+	refs/tags/*,commit)
+		# un-annotated tag
+		refname_type="tag"
+		short_refname=${refname##refs/tags/}
+		if [ "$allowunannotated" != "true" ]; then
+			echo "*** The un-annotated tag, $short_refname is not allowed in this repository" >&2
+			echo "*** Use 'git tag [ -a | -s ]' for tags you want to propagate." >&2
+			exit 1
+		fi
+		;;
+	refs/tags/*,tag)
+		# annotated tag
+		refname_type="annotated tag"
+		short_refname=${refname##refs/tags/}
+		# change recipients
+		if [ -n "$announcerecipients" ]; then
+			recipients="$announcerecipients"
+		fi
+		;;
+	refs/heads/*,commit)
+		# branch
+		refname_type="branch"
+		short_refname=${refname##refs/heads/}
+		;;
+	refs/remotes/*,commit)
+		# tracking branch
+		refname_type="tracking branch"
+		short_refname=${refname##refs/remotes/}
+		# Should this even be allowed?
+		echo "*** Push-update of tracking branch, $refname.  No email generated." >&2
+		exit 0
+		;;
+	*)
+		# Anything else (is there anything else?)
+		echo "*** Update hook: unknown type of update, \"$newrev_type\", to ref $refname" >&2
+		exit 1
+		;;
+esac
+
+# Check if we've got anyone to send to
+if [ -z "$recipients" ]; then
+	# If the email isn't sent, then at least give the user some idea of what command
+	# would generate the email at a later date
+	echo "*** No recipients found - no email will be sent, but the push will continue" >&2
+	echo "*** for $0 $1 $2 $3" >&2
+	exit 0
+fi
+
+# --- Email parameters
+committer=$(git show --pretty=full -s $newrev | grep "^Commit: " | sed -e "s/^Commit: //")
+describe=$(git describe $newrev 2>/dev/null)
+if [ -z "$describe" ]; then
+	describe=$newrev
+fi
+
+# --- Email (all stdout will be the email)
+(
+# Generate header
+cat <<-EOF
+From: $committer
+To: $recipients
+Subject: ${EMAILPREFIX}$projectdesc $refname_type, $short_refname now at $describe
+X-Git-Refname: $refname
+X-Git-Reftype: $refname_type
+X-Git-Oldrev: $oldrev
+X-Git-Newrev: $newrev
+
+Hello,
+
+This is an automated email from the git hooks/update script, it was
+generated because a ref change was pushed to the repository.
+
+Updating $refname_type, $short_refname,
+EOF
+
+case "$refname_type" in
+	"tracking branch"|branch)
+		if expr "$oldrev" : '0*$' >/dev/null
+		then
+			# If the old reference is "0000..0000" then this is a new branch
+			# and so oldrev is not valid
+			echo "  as a new  $refname_type"
+		    echo "        to  $newrev ($newrev_type)"
+			echo ""
+			echo $LOGBEGIN
+			# This shows all log entries that are not already covered by
+			# another ref - i.e. commits that are now accessible from this
+			# ref that were previously not accessible
+			git-rev-parse --not --all | git-rev-list --stdin --pretty $newref
+			echo $LOGEND
+		else
+			# oldrev is valid
+			oldrev_type=$(git-cat-file -t "$oldrev")
+
+			# Now the problem is for cases like this:
+			#   * --- * --- * --- * (oldrev)
+			#          \
+			#           * --- * --- * (newrev)
+			# i.e. there is no guarantee that newrev is a strict subset
+			# of oldrev - (would have required a force, but that's allowed).
+			# So, we can't simply say rev-list $oldrev..$newrev.  Instead
+			# we find the common base of the two revs and list from there
+			baserev=$(git-merge-base $oldrev $newrev)
+
+			# Commit with a parent
+			for rev in $(git-rev-list $newrev ^$baserev)
+			do
+				revtype=$(git-cat-file -t "$rev")
+				echo "       via  $rev ($revtype)"
+			done
+			if [ "$baserev" = "$oldrev" ]; then
+				echo "      from  $oldrev ($oldrev_type)"
+			else
+				echo "  based on  $baserev"
+				echo "      from  $oldrev ($oldrev_type)"
+				echo ""
+				echo "This ref update crossed a branch point; i.e. the old rev is not a strict subset"
+				echo "of the new rev.  This occurs, when you --force push a change in a situation"
+				echo "like this:"
+				echo ""
+				echo " * -- * -- B -- O -- O -- O ($oldrev)"
+				echo "            \\"
+				echo "             N -- N -- N ($newrev)"
+				echo ""
+				echo "Therefore, we assume that you've already had alert emails for all of the O"
+				echo "revisions, and now give you all the revisions in the N branch from the common"
+				echo "base, B ($baserev), up to the new revision."
+			fi
+			echo ""
+			echo $LOGBEGIN
+			git-rev-list --pretty $newrev ^$baserev
+			echo $LOGEND
+			echo ""
+			echo "Diffstat:"
+			git-diff-tree --no-color --stat -M -C --find-copies-harder $newrev ^$baserev
+		fi
+		;;
+	"annotated tag")
+		# Should we allow changes to annotated tags?
+		if expr "$oldrev" : '0*$' >/dev/null
+		then
+			# If the old reference is "0000..0000" then this is a new atag
+			# and so oldrev is not valid
+			echo "        to  $newrev ($newrev_type)"
+		else
+			echo "        to  $newrev ($newrev_type)"
+			echo "      from  $oldrev"
+		fi
+
+		# If this tag succeeds another, then show which tag it replaces
+		prevtag=$(git describe $newrev^ 2>/dev/null | sed 's/-g.*//')
+		if [ -n "$prevtag" ]; then
+			echo "  replaces  $prevtag"
+		fi
+
+		# Read the tag details
+		eval $(git cat-file tag $newrev | \
+			sed -n '4s/tagger \([^>]*>\)[^0-9]*\([0-9]*\).*/tagger="\1" ts="\2"/p')
+		tagged=$(date --date="1970-01-01 00:00:00 +0000 $ts seconds" +"$DATEFORMAT")
+
+		echo " tagged by  $tagger"
+		echo "        on  $tagged"
+
+		echo ""
+		echo $LOGBEGIN
+		echo ""
+
+		if [ -n "$prevtag" ]; then
+			git rev-list --pretty=short "$prevtag..$newrev" | git shortlog
+		else
+			git rev-list --pretty=short $newrev | git shortlog
+		fi
+
+		echo $LOGEND
+		echo ""
+		;;
+	*)
+		# By default, unannotated tags aren't allowed in; if
+		# they are though, it's debatable whether we would even want an
+		# email to be generated; however, I don't want to add another config
+		# option just for that.
+		#
+		# Unannotated tags are more about marking a point than releasing
+		# a version; therefore we don't do the shortlog summary that we
+		# do for annotated tags above - we simply show that the point has
+		# been marked, and print the log message for the marked point for
+		# reference purposes
+		#
+		# Note this section also catches any other reference type (although
+		# there aren't any) and deals with them in the same way.
+		if expr "$oldrev" : '0*$' >/dev/null
+		then
+			# If the old reference is "0000..0000" then this is a new tag
+			# and so oldrev is not valid
+			echo "  as a new  $refname_type"
+			echo "        to  $newrev ($newrev_type)"
+		else
+			echo "        to  $newrev ($newrev_type)"
+			echo "      from  $oldrev"
+		fi
+		echo ""
+		echo $LOGBEGIN
+		git-show --no-color --root -s $newrev
+		echo $LOGEND
+		echo ""
+		;;
+esac
+
+# Footer
+cat <<-EOF
+
+hooks/update
+---
+Git Source Code Management System
+$0 $1 \\
+  $2 \\
+  $3
+EOF
+#) | cat >&2
+) | /usr/sbin/sendmail -t
+
+# --- Finished
+exit 0

BIN
git/tests/data/repos/a/.git/index


+ 6 - 0
git/tests/data/repos/a/.git/info/exclude

@@ -0,0 +1,6 @@
+# git-ls-files --others --exclude-from=.git/info/exclude
+# Lines that start with '#' are comments.
+# For a project mostly in C, the following would be a good set of
+# exclude patterns (uncomment them if you want to use them):
+# *.[oa]
+# *~

+ 2 - 0
git/tests/data/repos/a/.git/logs/HEAD

@@ -0,0 +1,2 @@
+0000000000000000000000000000000000000000 2a72d929692c41d8554c07f6301757ba18a65d91 James Westby <jw+debian@jameswestby.net> 1174775029 +0000	commit (initial): Add a and b
+2a72d929692c41d8554c07f6301757ba18a65d91 a90fa2d900a17e99b433217e988c4eb4a2e9a097 James Westby <jw+debian@jameswestby.net> 1174775047 +0000	commit: Add c

+ 2 - 0
git/tests/data/repos/a/.git/logs/refs/heads/master

@@ -0,0 +1,2 @@
+0000000000000000000000000000000000000000 2a72d929692c41d8554c07f6301757ba18a65d91 James Westby <jw+debian@jameswestby.net> 1174775029 +0000	commit (initial): Add a and b
+2a72d929692c41d8554c07f6301757ba18a65d91 a90fa2d900a17e99b433217e988c4eb4a2e9a097 James Westby <jw+debian@jameswestby.net> 1174775047 +0000	commit: Add c

BIN
git/tests/data/repos/a/.git/objects/2a/72d929692c41d8554c07f6301757ba18a65d91


BIN
git/tests/data/repos/a/.git/objects/4e/f30bbfe26431a69c3820d3a683df54d688f2ec


BIN
git/tests/data/repos/a/.git/objects/4f/2e6529203aa6d44b5af6e3292c837ceda003f9


BIN
git/tests/data/repos/a/.git/objects/7d/9a07d797595ef11344549b8d08198e48c15364


BIN
git/tests/data/repos/a/.git/objects/a2/96d0bb611188cabb256919f36bc30117cca005


BIN
git/tests/data/repos/a/.git/objects/a9/0fa2d900a17e99b433217e988c4eb4a2e9a097


BIN
git/tests/data/repos/a/.git/objects/ff/d47d45845a8f6576491e1edb97e3fe6a850e7f


+ 1 - 0
git/tests/data/repos/a/.git/refs/heads/master

@@ -0,0 +1 @@
+a90fa2d900a17e99b433217e988c4eb4a2e9a097

+ 1 - 0
git/tests/data/repos/a/a

@@ -0,0 +1 @@
+file a

+ 1 - 0
git/tests/data/repos/a/b

@@ -0,0 +1 @@
+file b

+ 1 - 0
git/tests/data/repos/a/c

@@ -0,0 +1 @@
+file c

BIN
git/tests/data/trees/70c190eb48fa8bbb50ddc692a17b44cb781af7f6


+ 92 - 0
git/tests/test_objects.py

@@ -0,0 +1,92 @@
+import os
+import unittest
+
+from git.objects import (Blob,
+                         Tree,
+                         Commit,
+                         )
+
+a_sha = '6f670c0fb53f9463760b7295fbb814e965fb20c8'
+b_sha = '2969be3e8ee1c0222396a5611407e4769f14e54b'
+c_sha = '954a536f7819d40e6f637f849ee187dd10066349'
+tree_sha = '70c190eb48fa8bbb50ddc692a17b44cb781af7f6'
+
+class BlobReadTests(unittest.TestCase):
+  """Test decompression of blobs"""
+
+  def get_sha_file(self, obj, base, sha):
+    return obj.from_file(os.path.join(os.path.dirname(__file__),
+                                      'data', base, sha))
+
+  def get_blob(self, sha):
+    """Return the blob named sha from the test data dir"""
+    return self.get_sha_file(Blob, 'blobs', sha)
+
+  def get_tree(self, sha):
+    return self.get_sha_file(Tree, 'trees', sha)
+
+  def get_commit(self, sha):
+    return self.get_sha_file(Commit, 'commits', sha)
+
+  def test_decompress_simple_blob(self):
+    b = self.get_blob(a_sha)
+    self.assertEqual(b.text(), 'test 1\n')
+    self.assertEqual(b.sha().hexdigest(), a_sha)
+
+  def test_parse_empty_blob_object(self):
+    sha = 'e69de29bb2d1d6434b8b29ae775ad8c2e48c5391'
+    b = self.get_blob(sha)
+    self.assertEqual(b.text(), '')
+    self.assertEqual(b.sha().hexdigest(), sha)
+
+  def test_create_blob_from_string(self):
+    string = 'test 2\n'
+    b = Blob.from_string(string)
+    self.assertEqual(b.text(), string)
+    self.assertEqual(b.sha().hexdigest(), b_sha)
+
+  def test_parse_legacy_blob(self):
+    string = 'test 3\n'
+    b = self.get_blob(c_sha)
+    self.assertEqual(b.text(), string)
+    self.assertEqual(b.sha().hexdigest(), c_sha)
+
+  def test_read_tree_from_file(self):
+    t = self.get_tree(tree_sha)
+    self.assertEqual(t.entries()[0], (33188, 'a', a_sha))
+    self.assertEqual(t.entries()[1], (33188, 'b', b_sha))
+
+  def test_read_commit_from_file(self):
+    sha = '60dacdc733de308bb77bb76ce0fb0f9b44c9769e'
+    c = self.get_commit(sha)
+    self.assertEqual(c.tree(), tree_sha)
+    self.assertEqual(c.parents(), ['0d89f20333fbb1d2f3a94da77f4981373d8f4310'])
+    self.assertEqual(c.author(),
+        'James Westby <jw+debian@jameswestby.net> 1174759230 +0000')
+    self.assertEqual(c.committer(),
+        'James Westby <jw+debian@jameswestby.net> 1174759230 +0000')
+    self.assertEqual(c.message(), 'Test commit\n')
+
+  def test_read_commit_no_parents(self):
+    sha = '0d89f20333fbb1d2f3a94da77f4981373d8f4310'
+    c = self.get_commit(sha)
+    self.assertEqual(c.tree(), '90182552c4a85a45ec2a835cadc3451bebdfe870')
+    self.assertEqual(c.parents(), [])
+    self.assertEqual(c.author(),
+        'James Westby <jw+debian@jameswestby.net> 1174758034 +0000')
+    self.assertEqual(c.committer(),
+        'James Westby <jw+debian@jameswestby.net> 1174758034 +0000')
+    self.assertEqual(c.message(), 'Test commit\n')
+
+  def test_read_commit_two_parents(self):
+    sha = '5dac377bdded4c9aeb8dff595f0faeebcc8498cc'
+    c = self.get_commit(sha)
+    self.assertEqual(c.tree(), 'd80c186a03f423a81b39df39dc87fd269736ca86')
+    self.assertEqual(c.parents(), ['ab64bbdcc51b170d21588e5c5d391ee5c0c96dfd',
+                                   '4cffe90e0a41ad3f5190079d7c8f036bde29cbe6'])
+    self.assertEqual(c.author(),
+        'James Westby <jw+debian@jameswestby.net> 1174773719 +0000')
+    self.assertEqual(c.committer(),
+        'James Westby <jw+debian@jameswestby.net> 1174773719 +0000')
+    self.assertEqual(c.message(), 'Merge ../b\n')
+

+ 36 - 0
git/tests/test_repository.py

@@ -0,0 +1,36 @@
+import os
+import unittest
+
+from git.repository import Repository
+
+class RepositoryTests(unittest.TestCase):
+
+  def open_repo(self, name):
+    return Repository(os.path.join(os.path.dirname(__file__),
+                      'data/repos', name, '.git'))
+
+  def test_simple_props(self):
+    r = self.open_repo('a')
+    basedir = os.path.join(os.path.dirname(__file__), 'data/repos/a/.git')
+    self.assertEqual(r.basedir(), basedir)
+    self.assertEqual(r.object_dir(), os.path.join(basedir, 'objects'))
+
+  def test_ref(self):
+    r = self.open_repo('a')
+    self.assertEqual(r.ref('master'),
+                     'a90fa2d900a17e99b433217e988c4eb4a2e9a097')
+
+  def test_head(self):
+    r = self.open_repo('a')
+    self.assertEqual(r.head(), 'a90fa2d900a17e99b433217e988c4eb4a2e9a097')
+
+  def test_get_object(self):
+    r = self.open_repo('a')
+    obj = r.get_object(r.head())
+    self.assertEqual(obj._type, 'commit')
+
+  def test_get_object_non_existant(self):
+    r = self.open_repo('a')
+    obj = r.get_object('b91fa4d900g17e99b433218e988c4eb4a3e9a097')
+    self.assertEqual(obj, None)
+