2
0
Jelmer Vernooij 1 долоо хоног өмнө
parent
commit
17ba938e55
1 өөрчлөгдсөн 280 нэмэгдсэн , 0 устгасан
  1. 280 0
      devscripts/fix-history.py

+ 280 - 0
devscripts/fix-history.py

@@ -0,0 +1,280 @@
+#!/usr/bin/env python3
+
+"""Fix dulwich history by removing .git directories and updating old timestamps.
+
+Usage: ./fix-history.py <source-branch> <target-branch>
+Example: ./fix-history.py master main
+"""
+
+import sys
+import time
+
+from dulwich.objects import Commit, Tree
+from dulwich.repo import Repo
+
+
+def fix_tree(repo, tree_id, seen_trees=None):
+    """Recursively fix a tree by removing .git entries."""
+    if seen_trees is None:
+        seen_trees = set()
+
+    if tree_id in seen_trees:
+        return tree_id
+    seen_trees.add(tree_id)
+
+    try:
+        tree = repo[tree_id]
+    except KeyError:
+        return tree_id
+
+    if not isinstance(tree, Tree):
+        return tree_id
+
+    # Check if this tree contains .git entries
+    modified = False
+    new_items = []
+
+    for item in tree.items():
+        name, mode, sha = item
+
+        # Skip .git entries
+        if name == b".git":
+            modified = True
+            continue
+
+        # Recursively fix subtrees
+        if mode == 0o040000:  # Directory mode
+            new_sha = fix_tree(repo, sha, seen_trees)
+            if new_sha != sha:
+                modified = True
+                sha = new_sha
+
+        new_items.append((name, mode, sha))
+
+    if not modified:
+        return tree_id
+
+    print(f"Removing .git entry from tree {tree_id.decode()}")
+
+    # Create new tree without .git entries
+    new_tree = Tree()
+    for name, mode, sha in new_items:
+        new_tree.add(name, mode, sha)
+
+    repo.object_store.add_object(new_tree)
+    return new_tree.id
+
+
+def fix_commit_dates(commit):
+    """Fix commit dates if they're before 1990."""
+    modified = False
+
+    # Unix timestamp for 1990-01-01
+    min_timestamp = 315532800
+    max_timestamp = int(time.time())
+
+    # Fix author date
+    if commit.author_time < min_timestamp:
+        new_time = commit.author_time * 10
+        if min_timestamp <= new_time <= max_timestamp:
+            print(f"Fixed author date: {commit.author_time} -> {new_time}")
+            commit.author_time = new_time
+            modified = True
+
+    # Fix committer date
+    if commit.commit_time < min_timestamp:
+        new_time = commit.commit_time * 10
+        if min_timestamp <= new_time <= max_timestamp:
+            print(f"Fixed committer date: {commit.commit_time} -> {new_time}")
+            commit.commit_time = new_time
+            modified = True
+
+    return modified
+
+
+def rewrite_history(repo, source_branch, target_branch):
+    """Rewrite history to fix issues."""
+    print(f"=== Rewriting history from {source_branch} to {target_branch} ===")
+
+    # Get the head commit of the source branch
+    try:
+        source_ref = f"refs/heads/{source_branch}".encode()
+        head_sha = repo.refs[source_ref]
+    except KeyError:
+        print(f"Error: Branch '{source_branch}' not found")
+        return False
+
+    # Map old commit SHAs to new ones
+    commit_map = {}
+    tree_map = {}
+
+    # Get all commits in topological order
+    walker = repo.get_walker([head_sha])
+    commits = list(walker)
+    commits.reverse()  # Process from oldest to newest
+
+    print(f"Processing {len(commits)} commits...")
+
+    for i, commit_entry in enumerate(commits):
+        old_commit = commit_entry.commit
+
+        if i % 100 == 0:
+            print(f"Processed {i}/{len(commits)} commits...")
+
+        # Fix the tree
+        old_tree_id = old_commit.tree
+        if old_tree_id not in tree_map:
+            tree_map[old_tree_id] = fix_tree(repo, old_tree_id)
+        new_tree_id = tree_map[old_tree_id]
+
+        # Create new commit
+        new_commit = Commit()
+        new_commit.tree = new_tree_id
+        new_commit.author = old_commit.author
+        new_commit.committer = old_commit.committer
+        new_commit.author_time = old_commit.author_time
+        new_commit.commit_time = old_commit.commit_time
+        new_commit.author_timezone = old_commit.author_timezone
+        new_commit.commit_timezone = old_commit.commit_timezone
+        new_commit.message = old_commit.message
+        new_commit.encoding = old_commit.encoding
+        # note: Drop extra fields
+
+        # Fix dates
+        date_modified = fix_commit_dates(new_commit)
+
+        # Map parent commits
+        new_parents = []
+        for parent_sha in old_commit.parents:
+            if parent_sha in commit_map:
+                new_parents.append(commit_map[parent_sha])
+            else:
+                new_parents.append(parent_sha)
+        new_commit.parents = new_parents
+
+        # Check if commit actually changed
+        if (
+            new_tree_id == old_tree_id
+            and not date_modified
+            and new_parents == list(old_commit.parents)
+        ):
+            # No changes needed, reuse old commit
+            commit_map[old_commit.id] = old_commit.id
+        else:
+            # Add new commit to object store
+            repo.object_store.add_object(new_commit)
+            commit_map[old_commit.id] = new_commit.id
+
+    # Update the target branch
+    new_head = commit_map[head_sha]
+    target_ref = f"refs/heads/{target_branch}".encode()
+    repo.refs[target_ref] = new_head
+
+    print(
+        f"✓ Created branch '{target_branch}' with {len([k for k, v in commit_map.items() if k != v])} modified commits"
+    )
+    return True
+
+
+def main():
+    if len(sys.argv) != 3:
+        print(f"Usage: {sys.argv[0]} <source-branch> <target-branch>")
+        print(f"Example: {sys.argv[0]} master main")
+        print("")
+        print(
+            "This will create a new branch <target-branch> with the rewritten history from <source-branch>"
+        )
+        sys.exit(1)
+
+    source_branch = sys.argv[1]
+    target_branch = sys.argv[2]
+
+    print("=== Dulwich History Fix Script ===")
+    print("This script will:")
+    print("1. Remove .git directories from tree objects")
+    print("2. Fix any commits with dates before 1990")
+    print(
+        f"3. Create new branch '{target_branch}' from '{source_branch}' with fixed history"
+    )
+    print("")
+    print(f"Source branch: {source_branch}")
+    print(f"Target branch: {target_branch}")
+    print("")
+
+    # Open the repository
+    try:
+        repo = Repo(".")
+    except Exception as e:
+        print(f"Error: Could not open repository: {e}")
+        sys.exit(1)
+
+    # Check if source branch exists
+    source_ref = f"refs/heads/{source_branch}".encode()
+    if source_ref not in repo.refs:
+        print(f"Error: Source branch '{source_branch}' does not exist")
+        sys.exit(1)
+
+    # Check if target branch already exists
+    target_ref = f"refs/heads/{target_branch}".encode()
+    if target_ref in repo.refs:
+        print(f"Error: Target branch '{target_branch}' already exists")
+        print("Please delete it first or choose a different name")
+        sys.exit(1)
+
+    # Identify problematic trees
+    print("")
+    print("=== Identifying problematic trees ===")
+    bad_trees = []
+    for sha in repo.object_store:
+        obj = repo[sha]
+        if isinstance(obj, Tree):
+            for name, mode, item_sha in obj.items():
+                if name == b".git":
+                    bad_trees.append(sha)
+                    break
+
+    print(f"Found {len(bad_trees)} trees with .git directories")
+
+    # Check for commits with bad dates
+    print("")
+    print("=== Identifying problematic commits ===")
+    bad_dates = []
+    for sha in repo.object_store:
+        obj = repo[sha]
+        if isinstance(obj, Commit):
+            if obj.commit_time < 315532800 or obj.author_time < 315532800:
+                bad_dates.append(sha)
+
+    print(f"Found {len(bad_dates)} commits with dates before 1990")
+
+    # Rewrite history
+    print("")
+    if not rewrite_history(repo, source_branch, target_branch):
+        sys.exit(1)
+
+    print("")
+    print("=== Complete ===")
+    print(
+        f"Successfully created branch '{target_branch}' with fixed history from '{source_branch}'"
+    )
+    print("")
+    print("Summary of changes:")
+    print("- Removed .git directories from tree objects")
+    print("- Fixed commit timestamps that were before 1990")
+    print(f"- Created clean history in branch '{target_branch}'")
+    print("")
+    print("IMPORTANT NEXT STEPS:")
+    print(f"1. Review the changes: git log --oneline {target_branch}")
+    print(
+        f"2. Compare commit count: git rev-list --count {source_branch} vs git rev-list --count {target_branch}"
+    )
+    print("3. If satisfied, you can:")
+    print(f"   - Push the new branch: git push origin {target_branch}")
+    print("   - Set it as default branch on GitHub/GitLab")
+    print(f"   - Update local checkout: git checkout {target_branch}")
+    print("")
+    print(f"The original branch '{source_branch}' remains unchanged.")
+
+
+if __name__ == "__main__":
+    main()