|
@@ -0,0 +1,280 @@
|
|
|
+#!/usr/bin/env python3
|
|
|
+
|
|
|
+"""Fix dulwich history by removing .git directories and updating old timestamps.
|
|
|
+
|
|
|
+Usage: ./fix-history.py <source-branch> <target-branch>
|
|
|
+Example: ./fix-history.py master main
|
|
|
+"""
|
|
|
+
|
|
|
+import sys
|
|
|
+import time
|
|
|
+
|
|
|
+from dulwich.objects import Commit, Tree
|
|
|
+from dulwich.repo import Repo
|
|
|
+
|
|
|
+
|
|
|
+def fix_tree(repo, tree_id, seen_trees=None):
|
|
|
+ """Recursively fix a tree by removing .git entries."""
|
|
|
+ if seen_trees is None:
|
|
|
+ seen_trees = set()
|
|
|
+
|
|
|
+ if tree_id in seen_trees:
|
|
|
+ return tree_id
|
|
|
+ seen_trees.add(tree_id)
|
|
|
+
|
|
|
+ try:
|
|
|
+ tree = repo[tree_id]
|
|
|
+ except KeyError:
|
|
|
+ return tree_id
|
|
|
+
|
|
|
+ if not isinstance(tree, Tree):
|
|
|
+ return tree_id
|
|
|
+
|
|
|
+ # Check if this tree contains .git entries
|
|
|
+ modified = False
|
|
|
+ new_items = []
|
|
|
+
|
|
|
+ for item in tree.items():
|
|
|
+ name, mode, sha = item
|
|
|
+
|
|
|
+ # Skip .git entries
|
|
|
+ if name == b".git":
|
|
|
+ modified = True
|
|
|
+ continue
|
|
|
+
|
|
|
+ # Recursively fix subtrees
|
|
|
+ if mode == 0o040000: # Directory mode
|
|
|
+ new_sha = fix_tree(repo, sha, seen_trees)
|
|
|
+ if new_sha != sha:
|
|
|
+ modified = True
|
|
|
+ sha = new_sha
|
|
|
+
|
|
|
+ new_items.append((name, mode, sha))
|
|
|
+
|
|
|
+ if not modified:
|
|
|
+ return tree_id
|
|
|
+
|
|
|
+ print(f"Removing .git entry from tree {tree_id.decode()}")
|
|
|
+
|
|
|
+ # Create new tree without .git entries
|
|
|
+ new_tree = Tree()
|
|
|
+ for name, mode, sha in new_items:
|
|
|
+ new_tree.add(name, mode, sha)
|
|
|
+
|
|
|
+ repo.object_store.add_object(new_tree)
|
|
|
+ return new_tree.id
|
|
|
+
|
|
|
+
|
|
|
+def fix_commit_dates(commit):
|
|
|
+ """Fix commit dates if they're before 1990."""
|
|
|
+ modified = False
|
|
|
+
|
|
|
+ # Unix timestamp for 1990-01-01
|
|
|
+ min_timestamp = 315532800
|
|
|
+ max_timestamp = int(time.time())
|
|
|
+
|
|
|
+ # Fix author date
|
|
|
+ if commit.author_time < min_timestamp:
|
|
|
+ new_time = commit.author_time * 10
|
|
|
+ if min_timestamp <= new_time <= max_timestamp:
|
|
|
+ print(f"Fixed author date: {commit.author_time} -> {new_time}")
|
|
|
+ commit.author_time = new_time
|
|
|
+ modified = True
|
|
|
+
|
|
|
+ # Fix committer date
|
|
|
+ if commit.commit_time < min_timestamp:
|
|
|
+ new_time = commit.commit_time * 10
|
|
|
+ if min_timestamp <= new_time <= max_timestamp:
|
|
|
+ print(f"Fixed committer date: {commit.commit_time} -> {new_time}")
|
|
|
+ commit.commit_time = new_time
|
|
|
+ modified = True
|
|
|
+
|
|
|
+ return modified
|
|
|
+
|
|
|
+
|
|
|
+def rewrite_history(repo, source_branch, target_branch):
|
|
|
+ """Rewrite history to fix issues."""
|
|
|
+ print(f"=== Rewriting history from {source_branch} to {target_branch} ===")
|
|
|
+
|
|
|
+ # Get the head commit of the source branch
|
|
|
+ try:
|
|
|
+ source_ref = f"refs/heads/{source_branch}".encode()
|
|
|
+ head_sha = repo.refs[source_ref]
|
|
|
+ except KeyError:
|
|
|
+ print(f"Error: Branch '{source_branch}' not found")
|
|
|
+ return False
|
|
|
+
|
|
|
+ # Map old commit SHAs to new ones
|
|
|
+ commit_map = {}
|
|
|
+ tree_map = {}
|
|
|
+
|
|
|
+ # Get all commits in topological order
|
|
|
+ walker = repo.get_walker([head_sha])
|
|
|
+ commits = list(walker)
|
|
|
+ commits.reverse() # Process from oldest to newest
|
|
|
+
|
|
|
+ print(f"Processing {len(commits)} commits...")
|
|
|
+
|
|
|
+ for i, commit_entry in enumerate(commits):
|
|
|
+ old_commit = commit_entry.commit
|
|
|
+
|
|
|
+ if i % 100 == 0:
|
|
|
+ print(f"Processed {i}/{len(commits)} commits...")
|
|
|
+
|
|
|
+ # Fix the tree
|
|
|
+ old_tree_id = old_commit.tree
|
|
|
+ if old_tree_id not in tree_map:
|
|
|
+ tree_map[old_tree_id] = fix_tree(repo, old_tree_id)
|
|
|
+ new_tree_id = tree_map[old_tree_id]
|
|
|
+
|
|
|
+ # Create new commit
|
|
|
+ new_commit = Commit()
|
|
|
+ new_commit.tree = new_tree_id
|
|
|
+ new_commit.author = old_commit.author
|
|
|
+ new_commit.committer = old_commit.committer
|
|
|
+ new_commit.author_time = old_commit.author_time
|
|
|
+ new_commit.commit_time = old_commit.commit_time
|
|
|
+ new_commit.author_timezone = old_commit.author_timezone
|
|
|
+ new_commit.commit_timezone = old_commit.commit_timezone
|
|
|
+ new_commit.message = old_commit.message
|
|
|
+ new_commit.encoding = old_commit.encoding
|
|
|
+ # note: Drop extra fields
|
|
|
+
|
|
|
+ # Fix dates
|
|
|
+ date_modified = fix_commit_dates(new_commit)
|
|
|
+
|
|
|
+ # Map parent commits
|
|
|
+ new_parents = []
|
|
|
+ for parent_sha in old_commit.parents:
|
|
|
+ if parent_sha in commit_map:
|
|
|
+ new_parents.append(commit_map[parent_sha])
|
|
|
+ else:
|
|
|
+ new_parents.append(parent_sha)
|
|
|
+ new_commit.parents = new_parents
|
|
|
+
|
|
|
+ # Check if commit actually changed
|
|
|
+ if (
|
|
|
+ new_tree_id == old_tree_id
|
|
|
+ and not date_modified
|
|
|
+ and new_parents == list(old_commit.parents)
|
|
|
+ ):
|
|
|
+ # No changes needed, reuse old commit
|
|
|
+ commit_map[old_commit.id] = old_commit.id
|
|
|
+ else:
|
|
|
+ # Add new commit to object store
|
|
|
+ repo.object_store.add_object(new_commit)
|
|
|
+ commit_map[old_commit.id] = new_commit.id
|
|
|
+
|
|
|
+ # Update the target branch
|
|
|
+ new_head = commit_map[head_sha]
|
|
|
+ target_ref = f"refs/heads/{target_branch}".encode()
|
|
|
+ repo.refs[target_ref] = new_head
|
|
|
+
|
|
|
+ print(
|
|
|
+ f"✓ Created branch '{target_branch}' with {len([k for k, v in commit_map.items() if k != v])} modified commits"
|
|
|
+ )
|
|
|
+ return True
|
|
|
+
|
|
|
+
|
|
|
+def main():
|
|
|
+ if len(sys.argv) != 3:
|
|
|
+ print(f"Usage: {sys.argv[0]} <source-branch> <target-branch>")
|
|
|
+ print(f"Example: {sys.argv[0]} master main")
|
|
|
+ print("")
|
|
|
+ print(
|
|
|
+ "This will create a new branch <target-branch> with the rewritten history from <source-branch>"
|
|
|
+ )
|
|
|
+ sys.exit(1)
|
|
|
+
|
|
|
+ source_branch = sys.argv[1]
|
|
|
+ target_branch = sys.argv[2]
|
|
|
+
|
|
|
+ print("=== Dulwich History Fix Script ===")
|
|
|
+ print("This script will:")
|
|
|
+ print("1. Remove .git directories from tree objects")
|
|
|
+ print("2. Fix any commits with dates before 1990")
|
|
|
+ print(
|
|
|
+ f"3. Create new branch '{target_branch}' from '{source_branch}' with fixed history"
|
|
|
+ )
|
|
|
+ print("")
|
|
|
+ print(f"Source branch: {source_branch}")
|
|
|
+ print(f"Target branch: {target_branch}")
|
|
|
+ print("")
|
|
|
+
|
|
|
+ # Open the repository
|
|
|
+ try:
|
|
|
+ repo = Repo(".")
|
|
|
+ except Exception as e:
|
|
|
+ print(f"Error: Could not open repository: {e}")
|
|
|
+ sys.exit(1)
|
|
|
+
|
|
|
+ # Check if source branch exists
|
|
|
+ source_ref = f"refs/heads/{source_branch}".encode()
|
|
|
+ if source_ref not in repo.refs:
|
|
|
+ print(f"Error: Source branch '{source_branch}' does not exist")
|
|
|
+ sys.exit(1)
|
|
|
+
|
|
|
+ # Check if target branch already exists
|
|
|
+ target_ref = f"refs/heads/{target_branch}".encode()
|
|
|
+ if target_ref in repo.refs:
|
|
|
+ print(f"Error: Target branch '{target_branch}' already exists")
|
|
|
+ print("Please delete it first or choose a different name")
|
|
|
+ sys.exit(1)
|
|
|
+
|
|
|
+ # Identify problematic trees
|
|
|
+ print("")
|
|
|
+ print("=== Identifying problematic trees ===")
|
|
|
+ bad_trees = []
|
|
|
+ for sha in repo.object_store:
|
|
|
+ obj = repo[sha]
|
|
|
+ if isinstance(obj, Tree):
|
|
|
+ for name, mode, item_sha in obj.items():
|
|
|
+ if name == b".git":
|
|
|
+ bad_trees.append(sha)
|
|
|
+ break
|
|
|
+
|
|
|
+ print(f"Found {len(bad_trees)} trees with .git directories")
|
|
|
+
|
|
|
+ # Check for commits with bad dates
|
|
|
+ print("")
|
|
|
+ print("=== Identifying problematic commits ===")
|
|
|
+ bad_dates = []
|
|
|
+ for sha in repo.object_store:
|
|
|
+ obj = repo[sha]
|
|
|
+ if isinstance(obj, Commit):
|
|
|
+ if obj.commit_time < 315532800 or obj.author_time < 315532800:
|
|
|
+ bad_dates.append(sha)
|
|
|
+
|
|
|
+ print(f"Found {len(bad_dates)} commits with dates before 1990")
|
|
|
+
|
|
|
+ # Rewrite history
|
|
|
+ print("")
|
|
|
+ if not rewrite_history(repo, source_branch, target_branch):
|
|
|
+ sys.exit(1)
|
|
|
+
|
|
|
+ print("")
|
|
|
+ print("=== Complete ===")
|
|
|
+ print(
|
|
|
+ f"Successfully created branch '{target_branch}' with fixed history from '{source_branch}'"
|
|
|
+ )
|
|
|
+ print("")
|
|
|
+ print("Summary of changes:")
|
|
|
+ print("- Removed .git directories from tree objects")
|
|
|
+ print("- Fixed commit timestamps that were before 1990")
|
|
|
+ print(f"- Created clean history in branch '{target_branch}'")
|
|
|
+ print("")
|
|
|
+ print("IMPORTANT NEXT STEPS:")
|
|
|
+ print(f"1. Review the changes: git log --oneline {target_branch}")
|
|
|
+ print(
|
|
|
+ f"2. Compare commit count: git rev-list --count {source_branch} vs git rev-list --count {target_branch}"
|
|
|
+ )
|
|
|
+ print("3. If satisfied, you can:")
|
|
|
+ print(f" - Push the new branch: git push origin {target_branch}")
|
|
|
+ print(" - Set it as default branch on GitHub/GitLab")
|
|
|
+ print(f" - Update local checkout: git checkout {target_branch}")
|
|
|
+ print("")
|
|
|
+ print(f"The original branch '{source_branch}' remains unchanged.")
|
|
|
+
|
|
|
+
|
|
|
+if __name__ == "__main__":
|
|
|
+ main()
|