#!/usr/bin/env python3 """Fix dulwich history by removing .git directories and updating old timestamps. Usage: ./fix-history.py Example: ./fix-history.py master main """ import sys import time from dulwich.objects import Commit, Tree from dulwich.repo import Repo def fix_tree(repo, tree_id, seen_trees=None): """Recursively fix a tree by removing .git entries.""" if seen_trees is None: seen_trees = set() if tree_id in seen_trees: return tree_id seen_trees.add(tree_id) try: tree = repo[tree_id] except KeyError: return tree_id if not isinstance(tree, Tree): return tree_id # Check if this tree contains .git entries modified = False new_items = [] for item in tree.items(): name, mode, sha = item # Skip .git entries if name == b".git": modified = True continue # Recursively fix subtrees if mode == 0o040000: # Directory mode new_sha = fix_tree(repo, sha, seen_trees) if new_sha != sha: modified = True sha = new_sha new_items.append((name, mode, sha)) if not modified: return tree_id print(f"Removing .git entry from tree {tree_id.decode()}") # Create new tree without .git entries new_tree = Tree() for name, mode, sha in new_items: new_tree.add(name, mode, sha) repo.object_store.add_object(new_tree) return new_tree.id def fix_commit_dates(commit): """Fix commit dates if they're before 1990.""" modified = False # Unix timestamp for 1990-01-01 min_timestamp = 315532800 max_timestamp = int(time.time()) # Fix author date if commit.author_time < min_timestamp: new_time = commit.author_time * 10 if min_timestamp <= new_time <= max_timestamp: print(f"Fixed author date: {commit.author_time} -> {new_time}") commit.author_time = new_time modified = True # Fix committer date if commit.commit_time < min_timestamp: new_time = commit.commit_time * 10 if min_timestamp <= new_time <= max_timestamp: print(f"Fixed committer date: {commit.commit_time} -> {new_time}") commit.commit_time = new_time modified = True return modified def rewrite_history(repo, source_branch, target_branch): """Rewrite history to fix issues.""" print(f"=== Rewriting history from {source_branch} to {target_branch} ===") # Get the head commit of the source branch try: source_ref = f"refs/heads/{source_branch}".encode() head_sha = repo.refs[source_ref] except KeyError: print(f"Error: Branch '{source_branch}' not found") return False # Map old commit SHAs to new ones commit_map = {} tree_map = {} # Get all commits in topological order walker = repo.get_walker([head_sha]) commits = list(walker) commits.reverse() # Process from oldest to newest print(f"Processing {len(commits)} commits...") for i, commit_entry in enumerate(commits): old_commit = commit_entry.commit if i % 100 == 0: print(f"Processed {i}/{len(commits)} commits...") # Fix the tree old_tree_id = old_commit.tree if old_tree_id not in tree_map: tree_map[old_tree_id] = fix_tree(repo, old_tree_id) new_tree_id = tree_map[old_tree_id] # Create new commit new_commit = Commit() new_commit.tree = new_tree_id new_commit.author = old_commit.author new_commit.committer = old_commit.committer new_commit.author_time = old_commit.author_time new_commit.commit_time = old_commit.commit_time new_commit.author_timezone = old_commit.author_timezone new_commit.commit_timezone = old_commit.commit_timezone new_commit.message = old_commit.message new_commit.encoding = old_commit.encoding # note: Drop extra fields # Fix dates date_modified = fix_commit_dates(new_commit) # Map parent commits new_parents = [] for parent_sha in old_commit.parents: if parent_sha in commit_map: new_parents.append(commit_map[parent_sha]) else: new_parents.append(parent_sha) new_commit.parents = new_parents # Check if commit actually changed if ( new_tree_id == old_tree_id and not date_modified and new_parents == list(old_commit.parents) ): # No changes needed, reuse old commit commit_map[old_commit.id] = old_commit.id else: # Add new commit to object store repo.object_store.add_object(new_commit) commit_map[old_commit.id] = new_commit.id # Update the target branch new_head = commit_map[head_sha] target_ref = f"refs/heads/{target_branch}".encode() repo.refs[target_ref] = new_head print( f"✓ Created branch '{target_branch}' with {len([k for k, v in commit_map.items() if k != v])} modified commits" ) return True def main(): if len(sys.argv) != 3: print(f"Usage: {sys.argv[0]} ") print(f"Example: {sys.argv[0]} master main") print("") print( "This will create a new branch with the rewritten history from " ) sys.exit(1) source_branch = sys.argv[1] target_branch = sys.argv[2] print("=== Dulwich History Fix Script ===") print("This script will:") print("1. Remove .git directories from tree objects") print("2. Fix any commits with dates before 1990") print( f"3. Create new branch '{target_branch}' from '{source_branch}' with fixed history" ) print("") print(f"Source branch: {source_branch}") print(f"Target branch: {target_branch}") print("") # Open the repository try: repo = Repo(".") except Exception as e: print(f"Error: Could not open repository: {e}") sys.exit(1) # Check if source branch exists source_ref = f"refs/heads/{source_branch}".encode() if source_ref not in repo.refs: print(f"Error: Source branch '{source_branch}' does not exist") sys.exit(1) # Check if target branch already exists target_ref = f"refs/heads/{target_branch}".encode() if target_ref in repo.refs: print(f"Error: Target branch '{target_branch}' already exists") print("Please delete it first or choose a different name") sys.exit(1) # Identify problematic trees print("") print("=== Identifying problematic trees ===") bad_trees = [] for sha in repo.object_store: obj = repo[sha] if isinstance(obj, Tree): for name, mode, item_sha in obj.items(): if name == b".git": bad_trees.append(sha) break print(f"Found {len(bad_trees)} trees with .git directories") # Check for commits with bad dates print("") print("=== Identifying problematic commits ===") bad_dates = [] for sha in repo.object_store: obj = repo[sha] if isinstance(obj, Commit): if obj.commit_time < 315532800 or obj.author_time < 315532800: bad_dates.append(sha) print(f"Found {len(bad_dates)} commits with dates before 1990") # Rewrite history print("") if not rewrite_history(repo, source_branch, target_branch): sys.exit(1) print("") print("=== Complete ===") print( f"Successfully created branch '{target_branch}' with fixed history from '{source_branch}'" ) print("") print("Summary of changes:") print("- Removed .git directories from tree objects") print("- Fixed commit timestamps that were before 1990") print(f"- Created clean history in branch '{target_branch}'") print("") print("IMPORTANT NEXT STEPS:") print(f"1. Review the changes: git log --oneline {target_branch}") print( f"2. Compare commit count: git rev-list --count {source_branch} vs git rev-list --count {target_branch}" ) print("3. If satisfied, you can:") print(f" - Push the new branch: git push origin {target_branch}") print(" - Set it as default branch on GitHub/GitLab") print(f" - Update local checkout: git checkout {target_branch}") print("") print(f"The original branch '{source_branch}' remains unchanged.") if __name__ == "__main__": main()