Pārlūkot izejas kodu

Improve LFS support (#1688)

* Add compat tests for LFS
* Add porcelain for LFS
* Integrate with filters
Jelmer Vernooij 1 mēnesi atpakaļ
vecāks
revīzija
815b59c178

+ 15 - 11
dulwich/attrs.py

@@ -345,28 +345,32 @@ class GitAttributes:
         """
         """
         # Find existing pattern
         # Find existing pattern
         pattern_obj = None
         pattern_obj = None
-        pattern_index = None
+        attrs_dict: Optional[dict[bytes, AttributeValue]] = None
+        pattern_index = -1
+
         for i, (p, attrs) in enumerate(self._patterns):
         for i, (p, attrs) in enumerate(self._patterns):
             if p.pattern == pattern:
             if p.pattern == pattern:
                 pattern_obj = p
                 pattern_obj = p
+                # Convert to mutable dict
+                attrs_dict = dict(attrs)
                 pattern_index = i
                 pattern_index = i
                 break
                 break
 
 
         if pattern_obj is None:
         if pattern_obj is None:
             # Create new pattern
             # Create new pattern
             pattern_obj = Pattern(pattern)
             pattern_obj = Pattern(pattern)
-            attrs_dict: dict[bytes, AttributeValue] = {name: value}
+            attrs_dict = {name: value}
             self._patterns.append((pattern_obj, attrs_dict))
             self._patterns.append((pattern_obj, attrs_dict))
         else:
         else:
-            # Update existing pattern
-            # Create a new dict with updated attributes
-            assert (
-                pattern_index is not None
-            )  # pattern_index is set when pattern_obj is found
-            old_attrs = self._patterns[pattern_index][1]
-            new_attrs = dict(old_attrs)
-            new_attrs[name] = value
-            self._patterns[pattern_index] = (pattern_obj, new_attrs)
+            # Update the existing pattern in the list
+            assert pattern_index >= 0
+            assert attrs_dict is not None
+            self._patterns[pattern_index] = (pattern_obj, attrs_dict)
+
+        # Update the attribute
+        if attrs_dict is None:
+            raise AssertionError("attrs_dict should not be None at this point")
+        attrs_dict[name] = value
 
 
     def remove_pattern(self, pattern: bytes) -> None:
     def remove_pattern(self, pattern: bytes) -> None:
         """Remove all attributes for a pattern.
         """Remove all attributes for a pattern.

+ 175 - 0
dulwich/cli.py

@@ -1969,6 +1969,180 @@ class cmd_filter_branch(Command):
                 return 1
                 return 1
 
 
 
 
+class cmd_lfs(Command):
+    """Git LFS management commands."""
+
+    def run(self, argv) -> None:
+        parser = argparse.ArgumentParser(prog="dulwich lfs")
+        subparsers = parser.add_subparsers(dest="subcommand", help="LFS subcommands")
+
+        # lfs init
+        subparsers.add_parser("init", help="Initialize Git LFS")
+
+        # lfs track
+        parser_track = subparsers.add_parser(
+            "track", help="Track file patterns with LFS"
+        )
+        parser_track.add_argument("patterns", nargs="*", help="File patterns to track")
+
+        # lfs untrack
+        parser_untrack = subparsers.add_parser(
+            "untrack", help="Untrack file patterns from LFS"
+        )
+        parser_untrack.add_argument(
+            "patterns", nargs="+", help="File patterns to untrack"
+        )
+
+        # lfs ls-files
+        parser_ls = subparsers.add_parser("ls-files", help="List LFS files")
+        parser_ls.add_argument("--ref", help="Git ref to check (defaults to HEAD)")
+
+        # lfs migrate
+        parser_migrate = subparsers.add_parser("migrate", help="Migrate files to LFS")
+        parser_migrate.add_argument("--include", nargs="+", help="Patterns to include")
+        parser_migrate.add_argument("--exclude", nargs="+", help="Patterns to exclude")
+        parser_migrate.add_argument(
+            "--everything", action="store_true", help="Migrate all files above 100MB"
+        )
+
+        # lfs pointer
+        parser_pointer = subparsers.add_parser("pointer", help="Check LFS pointers")
+        parser_pointer.add_argument(
+            "--check", nargs="*", dest="paths", help="Check if files are LFS pointers"
+        )
+
+        # lfs clean
+        parser_clean = subparsers.add_parser("clean", help="Clean file to LFS pointer")
+        parser_clean.add_argument("path", help="File path to clean")
+
+        # lfs smudge
+        parser_smudge = subparsers.add_parser(
+            "smudge", help="Smudge LFS pointer to content"
+        )
+        parser_smudge.add_argument(
+            "--stdin", action="store_true", help="Read pointer from stdin"
+        )
+
+        # lfs fetch
+        parser_fetch = subparsers.add_parser(
+            "fetch", help="Fetch LFS objects from remote"
+        )
+        parser_fetch.add_argument(
+            "--remote", default="origin", help="Remote to fetch from"
+        )
+        parser_fetch.add_argument("refs", nargs="*", help="Specific refs to fetch")
+
+        # lfs pull
+        parser_pull = subparsers.add_parser(
+            "pull", help="Pull LFS objects for current checkout"
+        )
+        parser_pull.add_argument(
+            "--remote", default="origin", help="Remote to pull from"
+        )
+
+        # lfs push
+        parser_push = subparsers.add_parser("push", help="Push LFS objects to remote")
+        parser_push.add_argument("--remote", default="origin", help="Remote to push to")
+        parser_push.add_argument("refs", nargs="*", help="Specific refs to push")
+
+        # lfs status
+        subparsers.add_parser("status", help="Show status of LFS files")
+
+        args = parser.parse_args(argv)
+
+        if args.subcommand == "init":
+            porcelain.lfs_init()
+            print("Git LFS initialized.")
+
+        elif args.subcommand == "track":
+            if args.patterns:
+                tracked = porcelain.lfs_track(patterns=args.patterns)
+                print("Tracking patterns:")
+            else:
+                tracked = porcelain.lfs_track()
+                print("Currently tracked patterns:")
+            for pattern in tracked:
+                print(f"  {pattern}")
+
+        elif args.subcommand == "untrack":
+            tracked = porcelain.lfs_untrack(patterns=args.patterns)
+            print("Remaining tracked patterns:")
+            for pattern in tracked:
+                print(f"  {pattern}")
+
+        elif args.subcommand == "ls-files":
+            files = porcelain.lfs_ls_files(ref=args.ref)
+            for path, oid, size in files:
+                print(f"{oid[:12]} * {path} ({format_bytes(size)})")
+
+        elif args.subcommand == "migrate":
+            count = porcelain.lfs_migrate(
+                include=args.include, exclude=args.exclude, everything=args.everything
+            )
+            print(f"Migrated {count} file(s) to Git LFS.")
+
+        elif args.subcommand == "pointer":
+            if args.paths is not None:
+                results = porcelain.lfs_pointer_check(paths=args.paths or None)
+                for path, pointer in results.items():
+                    if pointer:
+                        print(
+                            f"{path}: LFS pointer (oid: {pointer.oid[:12]}, size: {format_bytes(pointer.size)})"
+                        )
+                    else:
+                        print(f"{path}: Not an LFS pointer")
+
+        elif args.subcommand == "clean":
+            pointer = porcelain.lfs_clean(path=args.path)
+            sys.stdout.buffer.write(pointer)
+
+        elif args.subcommand == "smudge":
+            if args.stdin:
+                pointer_content = sys.stdin.buffer.read()
+                content = porcelain.lfs_smudge(pointer_content=pointer_content)
+                sys.stdout.buffer.write(content)
+            else:
+                print("Error: --stdin required for smudge command")
+                sys.exit(1)
+
+        elif args.subcommand == "fetch":
+            refs = args.refs or None
+            count = porcelain.lfs_fetch(remote=args.remote, refs=refs)
+            print(f"Fetched {count} LFS object(s).")
+
+        elif args.subcommand == "pull":
+            count = porcelain.lfs_pull(remote=args.remote)
+            print(f"Pulled {count} LFS object(s).")
+
+        elif args.subcommand == "push":
+            refs = args.refs or None
+            count = porcelain.lfs_push(remote=args.remote, refs=refs)
+            print(f"Pushed {count} LFS object(s).")
+
+        elif args.subcommand == "status":
+            status = porcelain.lfs_status()
+
+            if status["tracked"]:
+                print(f"LFS tracked files: {len(status['tracked'])}")
+
+            if status["missing"]:
+                print("\nMissing LFS objects:")
+                for path in status["missing"]:
+                    print(f"  {path}")
+
+            if status["not_staged"]:
+                print("\nModified LFS files not staged:")
+                for path in status["not_staged"]:
+                    print(f"  {path}")
+
+            if not any(status.values()):
+                print("No LFS files found.")
+
+        else:
+            parser.print_help()
+            sys.exit(1)
+
+
 class cmd_help(Command):
 class cmd_help(Command):
     def run(self, args) -> None:
     def run(self, args) -> None:
         parser = argparse.ArgumentParser()
         parser = argparse.ArgumentParser()
@@ -2077,6 +2251,7 @@ commands = {
     "gc": cmd_gc,
     "gc": cmd_gc,
     "help": cmd_help,
     "help": cmd_help,
     "init": cmd_init,
     "init": cmd_init,
+    "lfs": cmd_lfs,
     "log": cmd_log,
     "log": cmd_log,
     "ls-files": cmd_ls_files,
     "ls-files": cmd_ls_files,
     "ls-remote": cmd_ls_remote,
     "ls-remote": cmd_ls_remote,

+ 120 - 23
dulwich/filters.py

@@ -22,10 +22,9 @@
 """Implementation of Git filter drivers (clean/smudge filters)."""
 """Implementation of Git filter drivers (clean/smudge filters)."""
 
 
 import subprocess
 import subprocess
-from collections.abc import Mapping
 from typing import TYPE_CHECKING, Callable, Optional, Protocol
 from typing import TYPE_CHECKING, Callable, Optional, Protocol
 
 
-from .attrs import AttributeValue, Pattern, match_path
+from .attrs import GitAttributes
 from .objects import Blob
 from .objects import Blob
 
 
 if TYPE_CHECKING:
 if TYPE_CHECKING:
@@ -93,6 +92,10 @@ class FilterRegistry:
 
 
         # Register built-in filter factories
         # Register built-in filter factories
         self.register_factory("lfs", self._create_lfs_filter)
         self.register_factory("lfs", self._create_lfs_filter)
+        self.register_factory("text", self._create_text_filter)
+
+        # Auto-register line ending filter if autocrlf is enabled
+        self._setup_line_ending_filter()
 
 
     def register_factory(
     def register_factory(
         self, name: str, factory: Callable[["FilterRegistry"], FilterDriver]
         self, name: str, factory: Callable[["FilterRegistry"], FilterDriver]
@@ -112,9 +115,9 @@ class FilterRegistry:
 
 
         # Try to create from factory
         # Try to create from factory
         if name in self._factories:
         if name in self._factories:
-            driver = self._factories[name](self)
-            self._drivers[name] = driver
-            return driver
+            factory_driver = self._factories[name](self)
+            self._drivers[name] = factory_driver
+            return factory_driver
 
 
         # Try to create from config
         # Try to create from config
         if self.config is not None:
         if self.config is not None:
@@ -135,21 +138,21 @@ class FilterRegistry:
 
 
         # Get clean command
         # Get clean command
         try:
         try:
-            clean_value = self.config.get(("filter", name), "clean")
-            if isinstance(clean_value, bytes):
-                clean_cmd = clean_value.decode("utf-8")
+            clean_cmd_raw = self.config.get(("filter", name), "clean")
+            if isinstance(clean_cmd_raw, bytes):
+                clean_cmd = clean_cmd_raw.decode("utf-8")
             else:
             else:
-                clean_cmd = clean_value
+                clean_cmd = clean_cmd_raw
         except KeyError:
         except KeyError:
             pass
             pass
 
 
         # Get smudge command
         # Get smudge command
         try:
         try:
-            smudge_value = self.config.get(("filter", name), "smudge")
-            if isinstance(smudge_value, bytes):
-                smudge_cmd = smudge_value.decode("utf-8")
+            smudge_cmd_raw = self.config.get(("filter", name), "smudge")
+            if isinstance(smudge_cmd_raw, bytes):
+                smudge_cmd = smudge_cmd_raw.decode("utf-8")
             else:
             else:
-                smudge_cmd = smudge_value
+                smudge_cmd = smudge_cmd_raw
         except KeyError:
         except KeyError:
             pass
             pass
 
 
@@ -174,30 +177,99 @@ class FilterRegistry:
 
 
         return LFSFilterDriver(lfs_store)
         return LFSFilterDriver(lfs_store)
 
 
+    def _create_text_filter(self, registry: "FilterRegistry") -> FilterDriver:
+        """Create text filter driver for line ending conversion.
+
+        This filter is used when files have the 'text' attribute set explicitly.
+        It always normalizes line endings on checkin (CRLF -> LF).
+        """
+        from .line_ending import (
+            LineEndingFilter,
+            convert_crlf_to_lf,
+            get_smudge_filter,
+        )
+
+        if self.config is None:
+            # Default text filter: always normalize on checkin
+            return LineEndingFilter(
+                clean_conversion=convert_crlf_to_lf,
+                smudge_conversion=None,
+                binary_detection=True,
+            )
+
+        # Get core.eol and core.autocrlf settings for smudge behavior
+        try:
+            core_eol_raw = self.config.get("core", "eol")
+            core_eol: str = (
+                core_eol_raw.decode("ascii")
+                if isinstance(core_eol_raw, bytes)
+                else core_eol_raw
+            )
+        except KeyError:
+            core_eol = "native"
+
+        # Parse autocrlf as bytes (can be b"true", b"input", or b"false")
+        try:
+            autocrlf_raw = self.config.get("core", "autocrlf")
+            autocrlf: bytes = (
+                autocrlf_raw.lower()
+                if isinstance(autocrlf_raw, bytes)
+                else str(autocrlf_raw).lower().encode("ascii")
+            )
+        except KeyError:
+            autocrlf = b"false"
+
+        # For explicit text attribute:
+        # - Always normalize to LF on checkin (clean)
+        # - Smudge behavior depends on core.eol and core.autocrlf
+        smudge_filter = get_smudge_filter(core_eol, autocrlf)
+        clean_filter = convert_crlf_to_lf
+
+        return LineEndingFilter(
+            clean_conversion=clean_filter,
+            smudge_conversion=smudge_filter,
+            binary_detection=True,
+        )
+
+    def _setup_line_ending_filter(self) -> None:
+        """Automatically register line ending filter if configured."""
+        if self.config is None:
+            return
+
+        # Parse autocrlf as bytes
+        try:
+            autocrlf_raw = self.config.get("core", "autocrlf")
+            autocrlf: bytes = (
+                autocrlf_raw.lower()
+                if isinstance(autocrlf_raw, bytes)
+                else str(autocrlf_raw).lower().encode("ascii")
+            )
+        except KeyError:
+            return
+
+        # If autocrlf is enabled, register the text filter
+        if autocrlf in (b"true", b"input"):
+            # Pre-create the text filter so it's available
+            self.get_driver("text")
+
 
 
 def get_filter_for_path(
 def get_filter_for_path(
     path: bytes,
     path: bytes,
-    gitattributes: dict[bytes, dict[bytes, AttributeValue]],
+    gitattributes: "GitAttributes",
     filter_registry: FilterRegistry,
     filter_registry: FilterRegistry,
 ) -> Optional[FilterDriver]:
 ) -> Optional[FilterDriver]:
     """Get the appropriate filter driver for a given path.
     """Get the appropriate filter driver for a given path.
 
 
     Args:
     Args:
         path: Path to check
         path: Path to check
-        gitattributes: Parsed gitattributes (pattern -> attributes mapping)
+        gitattributes: GitAttributes object with parsed patterns
         filter_registry: Registry of filter drivers
         filter_registry: Registry of filter drivers
 
 
     Returns:
     Returns:
         FilterDriver instance or None
         FilterDriver instance or None
     """
     """
-    # Convert gitattributes dict to list of (Pattern, attrs) tuples
-    patterns: list[tuple[Pattern, Mapping[bytes, AttributeValue]]] = []
-    for pattern_bytes, attrs in gitattributes.items():
-        pattern = Pattern(pattern_bytes)
-        patterns.append((pattern, attrs))
-
     # Get all attributes for this path
     # Get all attributes for this path
-    attributes = match_path(patterns, path)
+    attributes = gitattributes.match_path(path)
 
 
     # Check if there's a filter attribute
     # Check if there's a filter attribute
     filter_name = attributes.get(b"filter")
     filter_name = attributes.get(b"filter")
@@ -209,6 +281,31 @@ def get_filter_for_path(
             return filter_registry.get_driver(filter_name_str)
             return filter_registry.get_driver(filter_name_str)
         return None
         return None
 
 
+    # Check for text attribute
+    text_attr = attributes.get(b"text")
+    if text_attr is True:
+        # Use the text filter for line ending conversion
+        return filter_registry.get_driver("text")
+    elif text_attr is False:
+        # -text means binary, no conversion
+        return None
+
+    # If no explicit text attribute, check if autocrlf is enabled
+    # When autocrlf is true/input, files are treated as text by default
+    if filter_registry.config is not None:
+        try:
+            autocrlf_raw = filter_registry.config.get("core", "autocrlf")
+            autocrlf: bytes = (
+                autocrlf_raw.lower()
+                if isinstance(autocrlf_raw, bytes)
+                else str(autocrlf_raw).lower().encode("ascii")
+            )
+            if autocrlf in (b"true", b"input"):
+                # Use text filter for files without explicit attributes
+                return filter_registry.get_driver("text")
+        except KeyError:
+            pass
+
     return None
     return None
 
 
 
 
@@ -221,7 +318,7 @@ class FilterBlobNormalizer:
     def __init__(
     def __init__(
         self,
         self,
         config_stack: Optional["StackedConfig"],
         config_stack: Optional["StackedConfig"],
-        gitattributes: dict[bytes, dict[bytes, AttributeValue]],
+        gitattributes: GitAttributes,
         filter_registry: Optional[FilterRegistry] = None,
         filter_registry: Optional[FilterRegistry] = None,
         repo=None,
         repo=None,
     ) -> None:
     ) -> None:

+ 278 - 4
dulwich/lfs.py

@@ -20,15 +20,57 @@
 #
 #
 
 
 import hashlib
 import hashlib
+import json
 import os
 import os
 import tempfile
 import tempfile
 from collections.abc import Iterable
 from collections.abc import Iterable
-from typing import TYPE_CHECKING, BinaryIO, Optional
+from dataclasses import dataclass
+from typing import TYPE_CHECKING, BinaryIO, Optional, Union
+from urllib.error import HTTPError
+from urllib.parse import urljoin
+from urllib.request import Request, urlopen
 
 
 if TYPE_CHECKING:
 if TYPE_CHECKING:
     from .repo import Repo
     from .repo import Repo
 
 
 
 
+@dataclass
+class LFSAction:
+    """LFS action structure."""
+
+    href: str
+    header: Optional[dict[str, str]] = None
+    expires_at: Optional[str] = None
+
+
+@dataclass
+class LFSErrorInfo:
+    """LFS error structure."""
+
+    code: int
+    message: str
+
+
+@dataclass
+class LFSBatchObject:
+    """LFS batch object structure."""
+
+    oid: str
+    size: int
+    authenticated: Optional[bool] = None
+    actions: Optional[dict[str, LFSAction]] = None
+    error: Optional[LFSErrorInfo] = None
+
+
+@dataclass
+class LFSBatchResponse:
+    """LFS batch response structure."""
+
+    transfer: str
+    objects: list[LFSBatchObject]
+    hash_algo: Optional[str] = None
+
+
 class LFSStore:
 class LFSStore:
     """Stores objects on disk, indexed by SHA256."""
     """Stores objects on disk, indexed by SHA256."""
 
 
@@ -39,8 +81,12 @@ class LFSStore:
     def create(cls, lfs_dir: str) -> "LFSStore":
     def create(cls, lfs_dir: str) -> "LFSStore":
         if not os.path.isdir(lfs_dir):
         if not os.path.isdir(lfs_dir):
             os.mkdir(lfs_dir)
             os.mkdir(lfs_dir)
-        os.mkdir(os.path.join(lfs_dir, "tmp"))
-        os.mkdir(os.path.join(lfs_dir, "objects"))
+        tmp_dir = os.path.join(lfs_dir, "tmp")
+        if not os.path.isdir(tmp_dir):
+            os.mkdir(tmp_dir)
+        objects_dir = os.path.join(lfs_dir, "objects")
+        if not os.path.isdir(objects_dir):
+            os.mkdir(objects_dir)
         return cls(lfs_dir)
         return cls(lfs_dir)
 
 
     @classmethod
     @classmethod
@@ -76,7 +122,12 @@ class LFSStore:
         path = self._sha_path(sha.hexdigest())
         path = self._sha_path(sha.hexdigest())
         if not os.path.exists(os.path.dirname(path)):
         if not os.path.exists(os.path.dirname(path)):
             os.makedirs(os.path.dirname(path))
             os.makedirs(os.path.dirname(path))
-        os.rename(tmppath, path)
+
+        # Handle concurrent writes - if file already exists, just remove temp file
+        if os.path.exists(path):
+            os.remove(tmppath)
+        else:
+            os.rename(tmppath, path)
         return sha.hexdigest()
         return sha.hexdigest()
 
 
 
 
@@ -116,6 +167,9 @@ class LFSPointer:
             elif line.startswith("size "):
             elif line.startswith("size "):
                 try:
                 try:
                     size = int(line[5:].strip())
                     size = int(line[5:].strip())
+                    # Size must be non-negative
+                    if size < 0:
+                        return None
                 except ValueError:
                 except ValueError:
                     return None
                     return None
 
 
@@ -183,3 +237,223 @@ class LFSFilterDriver:
             # Object not found in LFS store, return pointer as-is
             # Object not found in LFS store, return pointer as-is
             # This matches Git LFS behavior when object is missing
             # This matches Git LFS behavior when object is missing
             return data
             return data
+
+
+class LFSClient:
+    """LFS client for network operations."""
+
+    def __init__(self, url: str, auth: Optional[tuple[str, str]] = None) -> None:
+        """Initialize LFS client.
+
+        Args:
+            url: LFS server URL
+            auth: Optional (username, password) tuple for authentication
+        """
+        self.url = url.rstrip("/")
+        self.auth = auth
+
+    def _make_request(
+        self,
+        method: str,
+        path: str,
+        data: Optional[bytes] = None,
+        headers: Optional[dict[str, str]] = None,
+    ) -> bytes:
+        """Make an HTTP request to the LFS server."""
+        url = urljoin(self.url, path)
+        req_headers = {
+            "Accept": "application/vnd.git-lfs+json",
+            "Content-Type": "application/vnd.git-lfs+json",
+        }
+        if headers:
+            req_headers.update(headers)
+
+        req = Request(url, data=data, headers=req_headers, method=method)
+
+        if self.auth:
+            import base64
+
+            auth_str = f"{self.auth[0]}:{self.auth[1]}"
+            b64_auth = base64.b64encode(auth_str.encode()).decode("ascii")
+            req.add_header("Authorization", f"Basic {b64_auth}")
+
+        try:
+            with urlopen(req) as response:
+                return response.read()
+        except HTTPError as e:
+            error_body = e.read().decode("utf-8", errors="ignore")
+            raise LFSError(f"LFS server error {e.code}: {error_body}")
+
+    def batch(
+        self,
+        operation: str,
+        objects: list[dict[str, Union[str, int]]],
+        ref: Optional[str] = None,
+    ) -> LFSBatchResponse:
+        """Perform batch operation to get transfer URLs.
+
+        Args:
+            operation: "download" or "upload"
+            objects: List of {"oid": str, "size": int} dicts
+            ref: Optional ref name
+
+        Returns:
+            Batch response from server
+        """
+        data: dict[
+            str, Union[str, list[str], list[dict[str, Union[str, int]]], dict[str, str]]
+        ] = {
+            "operation": operation,
+            "transfers": ["basic"],
+            "objects": objects,
+        }
+        if ref:
+            data["ref"] = {"name": ref}
+
+        response = self._make_request(
+            "POST", "/objects/batch", json.dumps(data).encode("utf-8")
+        )
+        response_data = json.loads(response)
+        return self._parse_batch_response(response_data)
+
+    def _parse_batch_response(self, data: dict) -> LFSBatchResponse:
+        """Parse JSON response into LFSBatchResponse dataclass."""
+        objects = []
+        for obj_data in data.get("objects", []):
+            actions = None
+            if "actions" in obj_data:
+                actions = {}
+                for action_name, action_data in obj_data["actions"].items():
+                    actions[action_name] = LFSAction(
+                        href=action_data["href"],
+                        header=action_data.get("header"),
+                        expires_at=action_data.get("expires_at"),
+                    )
+
+            error = None
+            if "error" in obj_data:
+                error = LFSErrorInfo(
+                    code=obj_data["error"]["code"], message=obj_data["error"]["message"]
+                )
+
+            batch_obj = LFSBatchObject(
+                oid=obj_data["oid"],
+                size=obj_data["size"],
+                authenticated=obj_data.get("authenticated"),
+                actions=actions,
+                error=error,
+            )
+            objects.append(batch_obj)
+
+        return LFSBatchResponse(
+            transfer=data.get("transfer", "basic"),
+            objects=objects,
+            hash_algo=data.get("hash_algo"),
+        )
+
+    def download(self, oid: str, size: int, ref: Optional[str] = None) -> bytes:
+        """Download an LFS object.
+
+        Args:
+            oid: Object ID (SHA256)
+            size: Expected size
+            ref: Optional ref name
+
+        Returns:
+            Object content
+        """
+        # Get download URL via batch API
+        batch_resp = self.batch("download", [{"oid": oid, "size": size}], ref)
+
+        if not batch_resp.objects:
+            raise LFSError(f"No objects returned for {oid}")
+
+        obj = batch_resp.objects[0]
+        if obj.error:
+            raise LFSError(f"Server error for {oid}: {obj.error.message}")
+
+        if not obj.actions or "download" not in obj.actions:
+            raise LFSError(f"No download actions for {oid}")
+
+        download_action = obj.actions["download"]
+        download_url = download_action.href
+
+        # Download the object
+        req = Request(download_url)
+        if download_action.header:
+            for name, value in download_action.header.items():
+                req.add_header(name, value)
+
+        with urlopen(req) as response:
+            content = response.read()
+
+        # Verify size
+        if len(content) != size:
+            raise LFSError(f"Downloaded size {len(content)} != expected {size}")
+
+        # Verify SHA256
+        actual_oid = hashlib.sha256(content).hexdigest()
+        if actual_oid != oid:
+            raise LFSError(f"Downloaded OID {actual_oid} != expected {oid}")
+
+        return content
+
+    def upload(
+        self, oid: str, size: int, content: bytes, ref: Optional[str] = None
+    ) -> None:
+        """Upload an LFS object.
+
+        Args:
+            oid: Object ID (SHA256)
+            size: Object size
+            content: Object content
+            ref: Optional ref name
+        """
+        # Get upload URL via batch API
+        batch_resp = self.batch("upload", [{"oid": oid, "size": size}], ref)
+
+        if not batch_resp.objects:
+            raise LFSError(f"No objects returned for {oid}")
+
+        obj = batch_resp.objects[0]
+        if obj.error:
+            raise LFSError(f"Server error for {oid}: {obj.error.message}")
+
+        # If no actions, object already exists
+        if not obj.actions:
+            return
+
+        if "upload" not in obj.actions:
+            raise LFSError(f"No upload action for {oid}")
+
+        upload_action = obj.actions["upload"]
+        upload_url = upload_action.href
+
+        # Upload the object
+        req = Request(upload_url, data=content, method="PUT")
+        if upload_action.header:
+            for name, value in upload_action.header.items():
+                req.add_header(name, value)
+
+        with urlopen(req) as response:
+            if response.status >= 400:
+                raise LFSError(f"Upload failed with status {response.status}")
+
+        # Verify if needed
+        if obj.actions and "verify" in obj.actions:
+            verify_action = obj.actions["verify"]
+            verify_data = json.dumps({"oid": oid, "size": size}).encode("utf-8")
+
+            req = Request(verify_action.href, data=verify_data, method="POST")
+            req.add_header("Content-Type", "application/vnd.git-lfs+json")
+            if verify_action.header:
+                for name, value in verify_action.header.items():
+                    req.add_header(name, value)
+
+            with urlopen(req) as response:
+                if response.status >= 400:
+                    raise LFSError(f"Verification failed with status {response.status}")
+
+
+class LFSError(Exception):
+    """LFS-specific error."""

+ 276 - 0
dulwich/lfs_server.py

@@ -0,0 +1,276 @@
+# lfs_server.py -- Simple Git LFS server implementation
+# Copyright (C) 2024 Jelmer Vernooij <jelmer@jelmer.uk>
+#
+# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
+# General Public License as public by the Free Software Foundation; version 2.0
+# or (at your option) any later version. You can redistribute it and/or
+# modify it under the terms of either of these two licenses.
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# You should have received a copy of the licenses; if not, see
+# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
+# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
+# License, Version 2.0.
+#
+
+"""Simple Git LFS server implementation for testing."""
+
+import hashlib
+import json
+import tempfile
+from http.server import BaseHTTPRequestHandler, HTTPServer
+from typing import Optional
+
+from .lfs import LFSStore
+
+
+class LFSRequestHandler(BaseHTTPRequestHandler):
+    """HTTP request handler for LFS operations."""
+
+    server: "LFSServer"  # Type annotation for the server attribute
+
+    def send_json_response(self, status_code: int, data: dict) -> None:
+        """Send a JSON response."""
+        response = json.dumps(data).encode("utf-8")
+        self.send_response(status_code)
+        self.send_header("Content-Type", "application/vnd.git-lfs+json")
+        self.send_header("Content-Length", str(len(response)))
+        self.end_headers()
+        self.wfile.write(response)
+
+    def do_POST(self) -> None:
+        """Handle POST requests."""
+        if self.path == "/objects/batch":
+            self.handle_batch()
+        elif self.path.startswith("/objects/") and self.path.endswith("/verify"):
+            self.handle_verify()
+        else:
+            self.send_error(404, "Not Found")
+
+    def do_PUT(self) -> None:
+        """Handle PUT requests (uploads)."""
+        if self.path.startswith("/objects/"):
+            self.handle_upload()
+        else:
+            self.send_error(404, "Not Found")
+
+    def do_GET(self) -> None:
+        """Handle GET requests (downloads)."""
+        if self.path.startswith("/objects/"):
+            self.handle_download()
+        else:
+            self.send_error(404, "Not Found")
+
+    def handle_batch(self) -> None:
+        """Handle batch API requests."""
+        content_length = int(self.headers["Content-Length"])
+        request_data = self.rfile.read(content_length)
+
+        try:
+            batch_request = json.loads(request_data)
+        except json.JSONDecodeError:
+            self.send_error(400, "Invalid JSON")
+            return
+
+        operation = batch_request.get("operation")
+        objects = batch_request.get("objects", [])
+
+        if operation not in ["download", "upload"]:
+            self.send_error(400, "Invalid operation")
+            return
+
+        response_objects = []
+
+        for obj in objects:
+            oid = obj.get("oid")
+            size = obj.get("size")
+
+            if not oid or size is None:
+                response_objects.append(
+                    {
+                        "oid": oid,
+                        "size": size,
+                        "error": {"code": 400, "message": "Missing oid or size"},
+                    }
+                )
+                continue
+
+            response_obj = {
+                "oid": oid,
+                "size": size,
+            }
+
+            if operation == "download":
+                # Check if object exists
+                if self._object_exists(oid):
+                    response_obj["actions"] = {
+                        "download": {
+                            "href": f"http://{self.headers['Host']}/objects/{oid}",
+                            "header": {"Accept": "application/octet-stream"},
+                        }
+                    }
+                else:
+                    response_obj["error"] = {"code": 404, "message": "Object not found"}
+            else:  # upload
+                response_obj["actions"] = {
+                    "upload": {
+                        "href": f"http://{self.headers['Host']}/objects/{oid}",
+                        "header": {"Content-Type": "application/octet-stream"},
+                    },
+                    "verify": {
+                        "href": f"http://{self.headers['Host']}/objects/{oid}/verify"
+                    },
+                }
+
+            response_objects.append(response_obj)
+
+        self.send_json_response(200, {"objects": response_objects})
+
+    def handle_download(self) -> None:
+        """Handle object download requests."""
+        # Extract OID from path
+        path_parts = self.path.strip("/").split("/")
+        if len(path_parts) != 2:
+            self.send_error(404, "Not Found")
+            return
+
+        oid = path_parts[1]
+
+        try:
+            with self.server.lfs_store.open_object(oid) as f:
+                content = f.read()
+
+            self.send_response(200)
+            self.send_header("Content-Type", "application/octet-stream")
+            self.send_header("Content-Length", str(len(content)))
+            self.end_headers()
+            self.wfile.write(content)
+        except KeyError:
+            self.send_error(404, "Object not found")
+
+    def handle_upload(self) -> None:
+        """Handle object upload requests."""
+        # Extract OID from path
+        path_parts = self.path.strip("/").split("/")
+        if len(path_parts) != 2:
+            self.send_error(404, "Not Found")
+            return
+
+        oid = path_parts[1]
+        content_length = int(self.headers["Content-Length"])
+
+        # Read content in chunks
+        chunks = []
+        remaining = content_length
+        while remaining > 0:
+            chunk_size = min(8192, remaining)
+            chunk = self.rfile.read(chunk_size)
+            if not chunk:
+                break
+            chunks.append(chunk)
+            remaining -= len(chunk)
+
+        # Calculate SHA256
+        content = b"".join(chunks)
+        calculated_oid = hashlib.sha256(content).hexdigest()
+
+        # Verify OID matches
+        if calculated_oid != oid:
+            self.send_error(400, f"OID mismatch: expected {oid}, got {calculated_oid}")
+            return
+
+        # Check if object already exists
+        if not self._object_exists(oid):
+            # Store the object only if it doesn't exist
+            self.server.lfs_store.write_object(chunks)
+
+        self.send_response(200)
+        self.end_headers()
+
+    def handle_verify(self) -> None:
+        """Handle object verification requests."""
+        # Extract OID from path
+        path_parts = self.path.strip("/").split("/")
+        if len(path_parts) != 3 or path_parts[2] != "verify":
+            self.send_error(404, "Not Found")
+            return
+
+        oid = path_parts[1]
+        content_length = int(self.headers.get("Content-Length", 0))
+
+        if content_length > 0:
+            request_data = self.rfile.read(content_length)
+            try:
+                verify_request = json.loads(request_data)
+                # Optionally validate size
+                if "size" in verify_request:
+                    # Could verify size matches stored object
+                    pass
+            except json.JSONDecodeError:
+                pass
+
+        # Check if object exists
+        if self._object_exists(oid):
+            self.send_response(200)
+            self.end_headers()
+        else:
+            self.send_error(404, "Object not found")
+
+    def _object_exists(self, oid: str) -> bool:
+        """Check if an object exists in the store."""
+        try:
+            # Try to open the object - if it exists, close it immediately
+            with self.server.lfs_store.open_object(oid):
+                return True
+        except KeyError:
+            return False
+
+    def log_message(self, format, *args):
+        """Override to suppress request logging during tests."""
+        if self.server.log_requests:
+            super().log_message(format, *args)
+
+
+class LFSServer(HTTPServer):
+    """Simple LFS server for testing."""
+
+    def __init__(self, server_address, lfs_store: LFSStore, log_requests: bool = False):
+        super().__init__(server_address, LFSRequestHandler)
+        self.lfs_store = lfs_store
+        self.log_requests = log_requests
+
+
+def run_lfs_server(
+    host: str = "localhost",
+    port: int = 0,
+    lfs_dir: Optional[str] = None,
+    log_requests: bool = False,
+) -> tuple[LFSServer, str]:
+    """Run an LFS server.
+
+    Args:
+        host: Host to bind to
+        port: Port to bind to (0 for random)
+        lfs_dir: Directory for LFS storage (temp dir if None)
+        log_requests: Whether to log HTTP requests
+
+    Returns:
+        Tuple of (server, url) where url is the base URL for the server
+    """
+    if lfs_dir is None:
+        lfs_dir = tempfile.mkdtemp()
+
+    lfs_store = LFSStore.create(lfs_dir)
+    server = LFSServer((host, port), lfs_store, log_requests)
+
+    # Get the actual port if we used 0
+    actual_port = server.server_address[1]
+    url = f"http://{host}:{actual_port}"
+
+    return server, url

+ 200 - 80
dulwich/line_ending.py

@@ -26,11 +26,11 @@ about how it seems to work.
 The normalization is a two-fold process that happens at two moments:
 The normalization is a two-fold process that happens at two moments:
 
 
 - When reading a file from the index and to the working directory. For example
 - When reading a file from the index and to the working directory. For example
-  when doing a ``git clone`` or ``git checkout`` call. We call this process the
-  read filter in this module.
+  when doing a ``git clone`` or ``git checkout`` call. This is called the
+  smudge filter (repository -> working tree).
 - When writing a file to the index from the working directory. For example
 - When writing a file to the index from the working directory. For example
-  when doing a ``git add`` call. We call this process the write filter in this
-  module.
+  when doing a ``git add`` call. This is called the clean filter (working tree
+  -> repository).
 
 
 Note that when checking status (getting unstaged changes), whether or not
 Note that when checking status (getting unstaged changes), whether or not
 normalization is done on write depends on whether or not the file in the
 normalization is done on write depends on whether or not the file in the
@@ -108,13 +108,13 @@ attribute defined in ``.gitattributes``; it takes three possible values:
       line-endings in the working directory and convert line-endings to LF
       line-endings in the working directory and convert line-endings to LF
       when writing to the index. When autocrlf is set to true, eol value is
       when writing to the index. When autocrlf is set to true, eol value is
       ignored.
       ignored.
-    - ``input``: Quite similar to the ``true`` value but only force the write
+    - ``input``: Quite similar to the ``true`` value but only applies the clean
       filter, ie line-ending of new files added to the index will get their
       filter, ie line-ending of new files added to the index will get their
       line-endings converted to LF.
       line-endings converted to LF.
     - ``false`` (default): No normalization is done.
     - ``false`` (default): No normalization is done.
 
 
 ``core.eol`` is the top-level configuration to define the line-ending to use
 ``core.eol`` is the top-level configuration to define the line-ending to use
-when applying the read_filer. It takes three possible values:
+when applying the smudge filter. It takes three possible values:
 
 
     - ``lf``: When normalization is done, force line-endings to be ``LF`` in the
     - ``lf``: When normalization is done, force line-endings to be ``LF`` in the
       working directory.
       working directory.
@@ -143,6 +143,9 @@ if TYPE_CHECKING:
     from .config import StackedConfig
     from .config import StackedConfig
     from .object_store import BaseObjectStore
     from .object_store import BaseObjectStore
 
 
+from . import replace_me
+from .attrs import GitAttributes, Pattern
+from .filters import FilterBlobNormalizer, FilterDriver, FilterRegistry
 from .object_store import iter_tree_contents
 from .object_store import iter_tree_contents
 from .objects import Blob, ObjectID
 from .objects import Blob, ObjectID
 from .patch import is_binary
 from .patch import is_binary
@@ -151,6 +154,42 @@ CRLF = b"\r\n"
 LF = b"\n"
 LF = b"\n"
 
 
 
 
+class LineEndingFilter(FilterDriver):
+    """Filter driver for line ending conversion."""
+
+    def __init__(
+        self,
+        clean_conversion: Optional[Callable[[bytes], bytes]] = None,
+        smudge_conversion: Optional[Callable[[bytes], bytes]] = None,
+        binary_detection: bool = True,
+    ):
+        self.clean_conversion = clean_conversion
+        self.smudge_conversion = smudge_conversion
+        self.binary_detection = binary_detection
+
+    def clean(self, data: bytes) -> bytes:
+        """Apply line ending conversion for checkin (working tree -> repository)."""
+        if self.clean_conversion is None:
+            return data
+
+        # Skip binary files if detection is enabled
+        if self.binary_detection and is_binary(data):
+            return data
+
+        return self.clean_conversion(data)
+
+    def smudge(self, data: bytes) -> bytes:
+        """Apply line ending conversion for checkout (repository -> working tree)."""
+        if self.smudge_conversion is None:
+            return data
+
+        # Skip binary files if detection is enabled
+        if self.binary_detection and is_binary(data):
+            return data
+
+        return self.smudge_conversion(data)
+
+
 def convert_crlf_to_lf(text_hunk: bytes) -> bytes:
 def convert_crlf_to_lf(text_hunk: bytes) -> bytes:
     """Convert CRLF in text hunk into LF.
     """Convert CRLF in text hunk into LF.
 
 
@@ -181,46 +220,26 @@ def convert_lf_to_crlf(text_hunk: bytes) -> bytes:
     return CRLF.join(cleaned_parts)
     return CRLF.join(cleaned_parts)
 
 
 
 
-def get_checkout_filter(
-    core_eol: str, core_autocrlf: Union[bool, str], git_attributes: dict[str, Any]
+def get_smudge_filter(
+    core_eol: str, core_autocrlf: bytes
 ) -> Optional[Callable[[bytes], bytes]]:
 ) -> Optional[Callable[[bytes], bytes]]:
-    """Returns the correct checkout filter based on the passed arguments."""
-    # TODO this function should process the git_attributes for the path and if
-    # the text attribute is not defined, fallback on the
-    # get_checkout_filter_autocrlf function with the autocrlf value
-    if isinstance(core_autocrlf, bool):
-        autocrlf_bytes = b"true" if core_autocrlf else b"false"
-    else:
-        autocrlf_bytes = (
-            core_autocrlf.encode("ascii")
-            if isinstance(core_autocrlf, str)
-            else core_autocrlf
-        )
-    return get_checkout_filter_autocrlf(autocrlf_bytes)
+    """Returns the correct smudge filter based on the passed arguments."""
+    # Git attributes handling is done by the filter infrastructure
+    return get_smudge_filter_autocrlf(core_autocrlf)
 
 
 
 
-def get_checkin_filter(
-    core_eol: str, core_autocrlf: Union[bool, str], git_attributes: dict[str, Any]
+def get_clean_filter(
+    core_eol: str, core_autocrlf: bytes
 ) -> Optional[Callable[[bytes], bytes]]:
 ) -> Optional[Callable[[bytes], bytes]]:
-    """Returns the correct checkin filter based on the passed arguments."""
-    # TODO this function should process the git_attributes for the path and if
-    # the text attribute is not defined, fallback on the
-    # get_checkin_filter_autocrlf function with the autocrlf value
-    if isinstance(core_autocrlf, bool):
-        autocrlf_bytes = b"true" if core_autocrlf else b"false"
-    else:
-        autocrlf_bytes = (
-            core_autocrlf.encode("ascii")
-            if isinstance(core_autocrlf, str)
-            else core_autocrlf
-        )
-    return get_checkin_filter_autocrlf(autocrlf_bytes)
+    """Returns the correct clean filter based on the passed arguments."""
+    # Git attributes handling is done by the filter infrastructure
+    return get_clean_filter_autocrlf(core_autocrlf)
 
 
 
 
-def get_checkout_filter_autocrlf(
+def get_smudge_filter_autocrlf(
     core_autocrlf: bytes,
     core_autocrlf: bytes,
 ) -> Optional[Callable[[bytes], bytes]]:
 ) -> Optional[Callable[[bytes], bytes]]:
-    """Returns the correct checkout filter base on autocrlf value.
+    """Returns the correct smudge filter base on autocrlf value.
 
 
     Args:
     Args:
       core_autocrlf: The bytes configuration value of core.autocrlf.
       core_autocrlf: The bytes configuration value of core.autocrlf.
@@ -234,10 +253,10 @@ def get_checkout_filter_autocrlf(
     return None
     return None
 
 
 
 
-def get_checkin_filter_autocrlf(
+def get_clean_filter_autocrlf(
     core_autocrlf: bytes,
     core_autocrlf: bytes,
 ) -> Optional[Callable[[bytes], bytes]]:
 ) -> Optional[Callable[[bytes], bytes]]:
-    """Returns the correct checkin filter base on autocrlf value.
+    """Returns the correct clean filter base on autocrlf value.
 
 
     Args:
     Args:
       core_autocrlf: The bytes configuration value of core.autocrlf.
       core_autocrlf: The bytes configuration value of core.autocrlf.
@@ -252,63 +271,162 @@ def get_checkin_filter_autocrlf(
     return None
     return None
 
 
 
 
-class BlobNormalizer:
+# Backwards compatibility wrappers
+@replace_me(since="0.23.1", remove_in="0.25.0")
+def get_checkout_filter(
+    core_eol: str, core_autocrlf: Union[bool, str], git_attributes: dict[str, Any]
+) -> Optional[Callable[[bytes], bytes]]:
+    """Deprecated: Use get_smudge_filter instead."""
+    # Convert core_autocrlf to bytes for compatibility
+    if isinstance(core_autocrlf, bool):
+        autocrlf_bytes = b"true" if core_autocrlf else b"false"
+    else:
+        autocrlf_bytes = (
+            core_autocrlf.encode("utf-8")
+            if isinstance(core_autocrlf, str)
+            else core_autocrlf
+        )
+    return get_smudge_filter(core_eol, autocrlf_bytes)
+
+
+@replace_me(since="0.23.1", remove_in="0.25.0")
+def get_checkin_filter(
+    core_eol: str, core_autocrlf: Union[bool, str], git_attributes: dict[str, Any]
+) -> Optional[Callable[[bytes], bytes]]:
+    """Deprecated: Use get_clean_filter instead."""
+    # Convert core_autocrlf to bytes for compatibility
+    if isinstance(core_autocrlf, bool):
+        autocrlf_bytes = b"true" if core_autocrlf else b"false"
+    else:
+        autocrlf_bytes = (
+            core_autocrlf.encode("utf-8")
+            if isinstance(core_autocrlf, str)
+            else core_autocrlf
+        )
+    return get_clean_filter(core_eol, autocrlf_bytes)
+
+
+@replace_me(since="0.23.1", remove_in="0.25.0")
+def get_checkout_filter_autocrlf(
+    core_autocrlf: bytes,
+) -> Optional[Callable[[bytes], bytes]]:
+    """Deprecated: Use get_smudge_filter_autocrlf instead."""
+    return get_smudge_filter_autocrlf(core_autocrlf)
+
+
+@replace_me(since="0.23.1", remove_in="0.25.0")
+def get_checkin_filter_autocrlf(
+    core_autocrlf: bytes,
+) -> Optional[Callable[[bytes], bytes]]:
+    """Deprecated: Use get_clean_filter_autocrlf instead."""
+    return get_clean_filter_autocrlf(core_autocrlf)
+
+
+class BlobNormalizer(FilterBlobNormalizer):
     """An object to store computation result of which filter to apply based
     """An object to store computation result of which filter to apply based
     on configuration, gitattributes, path and operation (checkin or checkout).
     on configuration, gitattributes, path and operation (checkin or checkout).
+
+    This class maintains backward compatibility while using the filter infrastructure.
     """
     """
 
 
     def __init__(
     def __init__(
-        self, config_stack: "StackedConfig", gitattributes: dict[str, Any]
+        self,
+        config_stack: "StackedConfig",
+        gitattributes: dict[str, Any],
+        core_eol: str = "native",
+        autocrlf: bytes = b"false",
     ) -> None:
     ) -> None:
-        self.config_stack = config_stack
-        self.gitattributes = gitattributes
-
-        # Compute which filters we needs based on parameters
-        try:
-            core_eol_raw = config_stack.get("core", "eol")
-            core_eol: str = (
-                core_eol_raw.decode("ascii")
-                if isinstance(core_eol_raw, bytes)
-                else core_eol_raw
-            )
-        except KeyError:
-            core_eol = "native"
-
-        try:
-            core_autocrlf_raw = config_stack.get("core", "autocrlf")
-            if isinstance(core_autocrlf_raw, bytes):
-                core_autocrlf: Union[bool, str] = core_autocrlf_raw.decode(
-                    "ascii"
-                ).lower()
+        # Set up a filter registry with line ending filters
+        filter_registry = FilterRegistry(config_stack)
+
+        # Create line ending filter if needed
+        smudge_filter = get_smudge_filter(core_eol, autocrlf)
+        clean_filter = get_clean_filter(core_eol, autocrlf)
+
+        # Always register a text filter that can be used by gitattributes
+        # Even if autocrlf is false, gitattributes text=true should work
+        line_ending_filter = LineEndingFilter(
+            clean_conversion=clean_filter or convert_crlf_to_lf,
+            smudge_conversion=smudge_filter or convert_lf_to_crlf,
+            binary_detection=True,
+        )
+        filter_registry.register_driver("text", line_ending_filter)
+
+        # Convert dict gitattributes to GitAttributes object for parent class
+        git_attrs_patterns = []
+        for pattern_str, attrs in gitattributes.items():
+            if isinstance(pattern_str, str):
+                pattern_bytes = pattern_str.encode("utf-8")
             else:
             else:
-                core_autocrlf = core_autocrlf_raw.lower()
-        except KeyError:
-            core_autocrlf = False
+                pattern_bytes = pattern_str
+            pattern = Pattern(pattern_bytes)
+            git_attrs_patterns.append((pattern, attrs))
 
 
-        self.fallback_read_filter = get_checkout_filter(
-            core_eol, core_autocrlf, self.gitattributes
-        )
-        self.fallback_write_filter = get_checkin_filter(
-            core_eol, core_autocrlf, self.gitattributes
-        )
+        git_attributes = GitAttributes(git_attrs_patterns)
+
+        # Initialize parent class with gitattributes
+        # The filter infrastructure will handle gitattributes processing
+        super().__init__(config_stack, git_attributes, filter_registry)
+
+        # Store original filters for backward compatibility
+        self.fallback_read_filter = smudge_filter
+        self.fallback_write_filter = clean_filter
 
 
     def checkin_normalize(self, blob: Blob, tree_path: bytes) -> Blob:
     def checkin_normalize(self, blob: Blob, tree_path: bytes) -> Blob:
         """Normalize a blob during a checkin operation."""
         """Normalize a blob during a checkin operation."""
-        if self.fallback_write_filter is not None:
-            return normalize_blob(
-                blob, self.fallback_write_filter, binary_detection=True
+        # First try to get filter from gitattributes (handled by parent)
+        result = super().checkin_normalize(blob, tree_path)
+
+        # Check if gitattributes explicitly disabled text conversion
+        attrs = self.gitattributes.match_path(tree_path)
+        if b"text" in attrs and attrs[b"text"] is False:
+            # Explicitly marked as binary, no conversion
+            return blob
+
+        # If no filter was applied via gitattributes and we have a fallback filter
+        # (autocrlf is enabled), apply it to all files
+        if result is blob and self.fallback_write_filter is not None:
+            # Apply the clean filter with binary detection
+            line_ending_filter = LineEndingFilter(
+                clean_conversion=self.fallback_write_filter,
+                smudge_conversion=None,
+                binary_detection=True,
             )
             )
+            filtered_data = line_ending_filter.clean(blob.data)
+            if filtered_data != blob.data:
+                new_blob = Blob()
+                new_blob.data = filtered_data
+                return new_blob
 
 
-        return blob
+        return result
 
 
     def checkout_normalize(self, blob: Blob, tree_path: bytes) -> Blob:
     def checkout_normalize(self, blob: Blob, tree_path: bytes) -> Blob:
         """Normalize a blob during a checkout operation."""
         """Normalize a blob during a checkout operation."""
-        if self.fallback_read_filter is not None:
-            return normalize_blob(
-                blob, self.fallback_read_filter, binary_detection=True
+        # First try to get filter from gitattributes (handled by parent)
+        result = super().checkout_normalize(blob, tree_path)
+
+        # Check if gitattributes explicitly disabled text conversion
+        attrs = self.gitattributes.match_path(tree_path)
+        if b"text" in attrs and attrs[b"text"] is False:
+            # Explicitly marked as binary, no conversion
+            return blob
+
+        # If no filter was applied via gitattributes and we have a fallback filter
+        # (autocrlf is enabled), apply it to all files
+        if result is blob and self.fallback_read_filter is not None:
+            # Apply the smudge filter with binary detection
+            line_ending_filter = LineEndingFilter(
+                clean_conversion=None,
+                smudge_conversion=self.fallback_read_filter,
+                binary_detection=True,
             )
             )
+            filtered_data = line_ending_filter.smudge(blob.data)
+            if filtered_data != blob.data:
+                new_blob = Blob()
+                new_blob.data = filtered_data
+                return new_blob
 
 
-        return blob
+        return result
 
 
 
 
 def normalize_blob(
 def normalize_blob(
@@ -344,8 +462,10 @@ class TreeBlobNormalizer(BlobNormalizer):
         git_attributes: dict[str, Any],
         git_attributes: dict[str, Any],
         object_store: "BaseObjectStore",
         object_store: "BaseObjectStore",
         tree: Optional[ObjectID] = None,
         tree: Optional[ObjectID] = None,
+        core_eol: str = "native",
+        autocrlf: bytes = b"false",
     ) -> None:
     ) -> None:
-        super().__init__(config_stack, git_attributes)
+        super().__init__(config_stack, git_attributes, core_eol, autocrlf)
         if tree:
         if tree:
             self.existing_paths = {
             self.existing_paths = {
                 name for name, _, _ in iter_tree_contents(object_store, tree)
                 name for name, _, _ in iter_tree_contents(object_store, tree)

+ 706 - 30
dulwich/porcelain.py

@@ -107,14 +107,22 @@ from .errors import SendPackError
 from .graph import can_fast_forward
 from .graph import can_fast_forward
 from .ignore import IgnoreFilterManager
 from .ignore import IgnoreFilterManager
 from .index import (
 from .index import (
+    IndexEntry,
     _fs_to_tree_path,
     _fs_to_tree_path,
     blob_from_path_and_stat,
     blob_from_path_and_stat,
     build_file_from_blob,
     build_file_from_blob,
+    build_index_from_tree,
     get_unstaged_changes,
     get_unstaged_changes,
+    index_entry_from_stat,
+    symlink,
     update_working_tree,
     update_working_tree,
+    validate_path_element_default,
+    validate_path_element_hfs,
+    validate_path_element_ntfs,
 )
 )
 from .object_store import tree_lookup_path
 from .object_store import tree_lookup_path
 from .objects import (
 from .objects import (
+    Blob,
     Commit,
     Commit,
     Tag,
     Tag,
     Tree,
     Tree,
@@ -130,7 +138,12 @@ from .objectspec import (
     parse_tree,
     parse_tree,
 )
 )
 from .pack import write_pack_from_container, write_pack_index
 from .pack import write_pack_from_container, write_pack_index
-from .patch import write_commit_patch, write_tree_diff
+from .patch import (
+    get_summary,
+    write_commit_patch,
+    write_object_diff,
+    write_tree_diff,
+)
 from .protocol import ZERO_SHA, Protocol
 from .protocol import ZERO_SHA, Protocol
 from .refs import (
 from .refs import (
     LOCAL_BRANCH_PREFIX,
     LOCAL_BRANCH_PREFIX,
@@ -1255,7 +1268,6 @@ def diff(
       outstream: Stream to write to
       outstream: Stream to write to
     """
     """
     from . import diff as diff_module
     from . import diff as diff_module
-    from .objectspec import parse_commit
 
 
     with open_repo_closing(repo) as r:
     with open_repo_closing(repo) as r:
         # Normalize paths to bytes
         # Normalize paths to bytes
@@ -1272,8 +1284,6 @@ def diff(
 
 
         # Resolve commit refs to SHAs if provided
         # Resolve commit refs to SHAs if provided
         if commit is not None:
         if commit is not None:
-            from .objects import Commit
-
             if isinstance(commit, Commit):
             if isinstance(commit, Commit):
                 # Already a Commit object
                 # Already a Commit object
                 commit_sha = commit.id
                 commit_sha = commit.id
@@ -1288,9 +1298,6 @@ def diff(
 
 
         if commit2 is not None:
         if commit2 is not None:
             # Compare two commits
             # Compare two commits
-            from .objects import Commit
-            from .patch import write_object_diff
-
             if isinstance(commit2, Commit):
             if isinstance(commit2, Commit):
                 commit2_obj = commit2
                 commit2_obj = commit2
             else:
             else:
@@ -1416,8 +1423,6 @@ def submodule_update(repo, paths=None, init=False, force=False, errstream=None)
       init: If True, initialize submodules first
       init: If True, initialize submodules first
       force: Force update even if local changes exist
       force: Force update even if local changes exist
     """
     """
-    from .client import get_transport_and_path
-    from .index import build_index_from_tree
     from .submodule import iter_cached_submodules
     from .submodule import iter_cached_submodules
 
 
     with open_repo_closing(repo) as r:
     with open_repo_closing(repo) as r:
@@ -1809,7 +1814,6 @@ def reset(repo, mode, treeish: Union[str, bytes, Commit, Tree, Tag] = "HEAD") ->
 
 
         elif mode == "mixed":
         elif mode == "mixed":
             # Mixed reset: update HEAD and index, but leave working tree unchanged
             # Mixed reset: update HEAD and index, but leave working tree unchanged
-            from .index import IndexEntry
             from .object_store import iter_tree_contents
             from .object_store import iter_tree_contents
 
 
             # Open the index
             # Open the index
@@ -1852,13 +1856,6 @@ def reset(repo, mode, treeish: Union[str, bytes, Commit, Tree, Tag] = "HEAD") ->
             config = r.get_config()
             config = r.get_config()
             honor_filemode = config.get_boolean(b"core", b"filemode", os.name != "nt")
             honor_filemode = config.get_boolean(b"core", b"filemode", os.name != "nt")
 
 
-            # Import validation functions
-            from .index import (
-                validate_path_element_default,
-                validate_path_element_hfs,
-                validate_path_element_ntfs,
-            )
-
             if config.get_boolean(b"core", b"core.protectNTFS", os.name == "nt"):
             if config.get_boolean(b"core", b"core.protectNTFS", os.name == "nt"):
                 validate_path_element = validate_path_element_ntfs
                 validate_path_element = validate_path_element_ntfs
             elif config.get_boolean(
             elif config.get_boolean(
@@ -1869,9 +1866,6 @@ def reset(repo, mode, treeish: Union[str, bytes, Commit, Tree, Tag] = "HEAD") ->
                 validate_path_element = validate_path_element_default
                 validate_path_element = validate_path_element_default
 
 
             if config.get_boolean(b"core", b"symlinks", True):
             if config.get_boolean(b"core", b"symlinks", True):
-                # Import symlink function
-                from .index import symlink
-
                 symlink_fn = symlink
                 symlink_fn = symlink
             else:
             else:
 
 
@@ -3074,9 +3068,10 @@ def update_head(repo, target, detached=False, new_branch=None) -> None:
 
 
 def checkout(
 def checkout(
     repo,
     repo,
-    target: Union[str, bytes, Commit, Tag],
+    target: Optional[Union[str, bytes, Commit, Tag]] = None,
     force: bool = False,
     force: bool = False,
     new_branch: Optional[Union[bytes, str]] = None,
     new_branch: Optional[Union[bytes, str]] = None,
+    paths: Optional[list[Union[bytes, str]]] = None,
 ) -> None:
 ) -> None:
     """Switch to a branch or commit, updating both HEAD and the working tree.
     """Switch to a branch or commit, updating both HEAD and the working tree.
 
 
@@ -3086,9 +3081,12 @@ def checkout(
 
 
     Args:
     Args:
       repo: Path to repository or repository object
       repo: Path to repository or repository object
-      target: Branch name, tag, or commit SHA to checkout
+      target: Branch name, tag, or commit SHA to checkout. If None and paths is specified,
+              restores files from HEAD
       force: Force checkout even if there are local changes
       force: Force checkout even if there are local changes
       new_branch: Create a new branch at target (like git checkout -b)
       new_branch: Create a new branch at target (like git checkout -b)
+      paths: List of specific paths to checkout. If specified, only these paths are updated
+             and HEAD is not changed
 
 
     Raises:
     Raises:
       CheckoutError: If checkout cannot be performed due to conflicts
       CheckoutError: If checkout cannot be performed due to conflicts
@@ -3097,6 +3095,77 @@ def checkout(
     with open_repo_closing(repo) as r:
     with open_repo_closing(repo) as r:
         # Store the original target for later reference checks
         # Store the original target for later reference checks
         original_target = target
         original_target = target
+        # Handle path-specific checkout (like git checkout -- <paths>)
+        if paths is not None:
+            # Convert paths to bytes
+            byte_paths = []
+            for path in paths:
+                if isinstance(path, str):
+                    byte_paths.append(path.encode(DEFAULT_ENCODING))
+                else:
+                    byte_paths.append(path)
+
+            # If no target specified, use HEAD
+            if target is None:
+                try:
+                    target = r.refs[b"HEAD"]
+                except KeyError:
+                    raise CheckoutError("No HEAD reference found")
+            else:
+                if isinstance(target, str):
+                    target = target.encode(DEFAULT_ENCODING)
+
+            # Get the target commit and tree
+            target_commit = parse_commit(r, target)
+            target_tree = r[target_commit.tree]
+
+            # Get blob normalizer for line ending conversion
+            blob_normalizer = r.get_blob_normalizer()
+
+            # Restore specified paths from target tree
+            for path in byte_paths:
+                try:
+                    # Look up the path in the target tree
+                    mode, sha = target_tree.lookup_path(
+                        r.object_store.__getitem__, path
+                    )
+                    obj = r[sha]
+
+                    # Create directories if needed
+                    # Handle path as string
+                    if isinstance(path, bytes):
+                        path_str = path.decode(DEFAULT_ENCODING)
+                    else:
+                        path_str = path
+                    file_path = os.path.join(r.path, path_str)
+                    os.makedirs(os.path.dirname(file_path), exist_ok=True)
+
+                    # Write the file content
+                    if stat.S_ISREG(mode):
+                        # Apply checkout filters (smudge)
+                        if blob_normalizer:
+                            obj = blob_normalizer.checkout_normalize(obj, path)
+
+                        flags = os.O_WRONLY | os.O_CREAT | os.O_TRUNC
+                        if sys.platform == "win32":
+                            flags |= os.O_BINARY
+
+                        with os.fdopen(os.open(file_path, flags, mode), "wb") as f:
+                            f.write(obj.data)
+
+                    # Update the index
+                    r.stage(path)
+
+                except KeyError:
+                    # Path doesn't exist in target tree
+                    pass
+
+            return
+
+        # Normal checkout (switching branches/commits)
+        if target is None:
+            raise ValueError("Target must be specified for branch/commit checkout")
+
         if isinstance(target, str):
         if isinstance(target, str):
             target_bytes = target.encode(DEFAULT_ENCODING)
             target_bytes = target.encode(DEFAULT_ENCODING)
         elif isinstance(target, bytes):
         elif isinstance(target, bytes):
@@ -3109,6 +3178,9 @@ def checkout(
             new_branch = new_branch.encode(DEFAULT_ENCODING)
             new_branch = new_branch.encode(DEFAULT_ENCODING)
 
 
         # Parse the target to get the commit
         # Parse the target to get the commit
+        assert (
+            original_target is not None
+        )  # Guaranteed by earlier check for normal checkout
         target_commit = parse_commit(r, original_target)
         target_commit = parse_commit(r, original_target)
         target_tree_id = target_commit.tree
         target_tree_id = target_commit.tree
 
 
@@ -3153,18 +3225,12 @@ def checkout(
         config = r.get_config()
         config = r.get_config()
         honor_filemode = config.get_boolean(b"core", b"filemode", os.name != "nt")
         honor_filemode = config.get_boolean(b"core", b"filemode", os.name != "nt")
 
 
-        # Import validation functions
-        from .index import validate_path_element_default, validate_path_element_ntfs
-
         if config.get_boolean(b"core", b"core.protectNTFS", os.name == "nt"):
         if config.get_boolean(b"core", b"core.protectNTFS", os.name == "nt"):
             validate_path_element = validate_path_element_ntfs
             validate_path_element = validate_path_element_ntfs
         else:
         else:
             validate_path_element = validate_path_element_default
             validate_path_element = validate_path_element_default
 
 
         if config.get_boolean(b"core", b"symlinks", True):
         if config.get_boolean(b"core", b"symlinks", True):
-            # Import symlink function
-            from .index import symlink
-
             symlink_fn = symlink
             symlink_fn = symlink
         else:
         else:
 
 
@@ -4621,8 +4687,6 @@ def format_patch(
                 )
                 )
             else:
             else:
                 # Generate filename
                 # Generate filename
-                from .patch import get_summary
-
                 summary = get_summary(commit)
                 summary = get_summary(commit)
                 filename = os.path.join(outdir, f"{i:04d}-{summary}.patch")
                 filename = os.path.join(outdir, f"{i:04d}-{summary}.patch")
 
 
@@ -4865,3 +4929,615 @@ def reflog(repo=".", ref=b"HEAD", all=False):
                 # Read the reflog entries for this ref
                 # Read the reflog entries for this ref
                 for entry in r.read_reflog(ref_bytes):
                 for entry in r.read_reflog(ref_bytes):
                     yield (ref_bytes, entry)
                     yield (ref_bytes, entry)
+
+
+def lfs_track(repo=".", patterns=None):
+    """Track file patterns with Git LFS.
+
+    Args:
+      repo: Path to repository
+      patterns: List of file patterns to track (e.g., ["*.bin", "*.pdf"])
+                If None, returns current tracked patterns
+
+    Returns:
+      List of tracked patterns
+    """
+    from .attrs import GitAttributes
+
+    with open_repo_closing(repo) as r:
+        gitattributes_path = os.path.join(r.path, ".gitattributes")
+
+        # Load existing GitAttributes
+        if os.path.exists(gitattributes_path):
+            gitattributes = GitAttributes.from_file(gitattributes_path)
+        else:
+            gitattributes = GitAttributes()
+
+        if patterns is None:
+            # Return current LFS tracked patterns
+            tracked = []
+            for pattern_obj, attrs in gitattributes:
+                if attrs.get(b"filter") == b"lfs":
+                    tracked.append(pattern_obj.pattern.decode())
+            return tracked
+
+        # Add new patterns
+        for pattern in patterns:
+            # Ensure pattern is bytes
+            if isinstance(pattern, str):
+                pattern = pattern.encode()
+
+            # Set LFS attributes for the pattern
+            gitattributes.set_attribute(pattern, b"filter", b"lfs")
+            gitattributes.set_attribute(pattern, b"diff", b"lfs")
+            gitattributes.set_attribute(pattern, b"merge", b"lfs")
+            gitattributes.set_attribute(pattern, b"text", False)
+
+        # Write updated attributes
+        gitattributes.write_to_file(gitattributes_path)
+
+        # Stage the .gitattributes file
+        add(r, [".gitattributes"])
+
+        return lfs_track(r)  # Return updated list
+
+
+def lfs_untrack(repo=".", patterns=None):
+    """Untrack file patterns from Git LFS.
+
+    Args:
+      repo: Path to repository
+      patterns: List of file patterns to untrack
+
+    Returns:
+      List of remaining tracked patterns
+    """
+    from .attrs import GitAttributes
+
+    if not patterns:
+        return lfs_track(repo)
+
+    with open_repo_closing(repo) as r:
+        gitattributes_path = os.path.join(r.path, ".gitattributes")
+
+        if not os.path.exists(gitattributes_path):
+            return []
+
+        # Load existing GitAttributes
+        gitattributes = GitAttributes.from_file(gitattributes_path)
+
+        # Remove specified patterns
+        for pattern in patterns:
+            if isinstance(pattern, str):
+                pattern = pattern.encode()
+
+            # Check if pattern is tracked by LFS
+            for pattern_obj, attrs in list(gitattributes):
+                if pattern_obj.pattern == pattern and attrs.get(b"filter") == b"lfs":
+                    gitattributes.remove_pattern(pattern)
+                    break
+
+        # Write updated attributes
+        gitattributes.write_to_file(gitattributes_path)
+
+        # Stage the .gitattributes file
+        add(r, [".gitattributes"])
+
+        return lfs_track(r)  # Return updated list
+
+
+def lfs_init(repo="."):
+    """Initialize Git LFS in a repository.
+
+    Args:
+      repo: Path to repository
+
+    Returns:
+      None
+    """
+    from .lfs import LFSStore
+
+    with open_repo_closing(repo) as r:
+        # Create LFS store
+        LFSStore.from_repo(r, create=True)
+
+        # Set up Git config for LFS
+        config = r.get_config()
+        config.set((b"filter", b"lfs"), b"process", b"git-lfs filter-process")
+        config.set((b"filter", b"lfs"), b"required", b"true")
+        config.set((b"filter", b"lfs"), b"clean", b"git-lfs clean -- %f")
+        config.set((b"filter", b"lfs"), b"smudge", b"git-lfs smudge -- %f")
+        config.write_to_path()
+
+
+def lfs_clean(repo=".", path=None):
+    """Clean a file by converting it to an LFS pointer.
+
+    Args:
+      repo: Path to repository
+      path: Path to file to clean (relative to repo root)
+
+    Returns:
+      LFS pointer content as bytes
+    """
+    from .lfs import LFSFilterDriver, LFSStore
+
+    with open_repo_closing(repo) as r:
+        if path is None:
+            raise ValueError("Path must be specified")
+
+        # Get LFS store
+        lfs_store = LFSStore.from_repo(r)
+        filter_driver = LFSFilterDriver(lfs_store)
+
+        # Read file content
+        full_path = os.path.join(r.path, path)
+        with open(full_path, "rb") as f:
+            content = f.read()
+
+        # Clean the content (convert to LFS pointer)
+        return filter_driver.clean(content)
+
+
+def lfs_smudge(repo=".", pointer_content=None):
+    """Smudge an LFS pointer by retrieving the actual content.
+
+    Args:
+      repo: Path to repository
+      pointer_content: LFS pointer content as bytes
+
+    Returns:
+      Actual file content as bytes
+    """
+    from .lfs import LFSFilterDriver, LFSStore
+
+    with open_repo_closing(repo) as r:
+        if pointer_content is None:
+            raise ValueError("Pointer content must be specified")
+
+        # Get LFS store
+        lfs_store = LFSStore.from_repo(r)
+        filter_driver = LFSFilterDriver(lfs_store)
+
+        # Smudge the pointer (retrieve actual content)
+        return filter_driver.smudge(pointer_content)
+
+
+def lfs_ls_files(repo=".", ref=None):
+    """List files tracked by Git LFS.
+
+    Args:
+      repo: Path to repository
+      ref: Git ref to check (defaults to HEAD)
+
+    Returns:
+      List of (path, oid, size) tuples for LFS files
+    """
+    from .lfs import LFSPointer
+    from .object_store import iter_tree_contents
+
+    with open_repo_closing(repo) as r:
+        if ref is None:
+            ref = b"HEAD"
+        elif isinstance(ref, str):
+            ref = ref.encode()
+
+        # Get the commit and tree
+        try:
+            commit = r[ref]
+            tree = r[commit.tree]
+        except KeyError:
+            return []
+
+        lfs_files = []
+
+        # Walk the tree
+        for path, mode, sha in iter_tree_contents(r.object_store, tree.id):
+            if not stat.S_ISREG(mode):
+                continue
+
+            # Check if it's an LFS pointer
+            obj = r.object_store[sha]
+            pointer = LFSPointer.from_bytes(obj.data)
+            if pointer is not None:
+                lfs_files.append((path.decode(), pointer.oid, pointer.size))
+
+        return lfs_files
+
+
+def lfs_migrate(repo=".", include=None, exclude=None, everything=False):
+    """Migrate files to Git LFS.
+
+    Args:
+      repo: Path to repository
+      include: Patterns of files to include
+      exclude: Patterns of files to exclude
+      everything: Migrate all files above a certain size
+
+    Returns:
+      Number of migrated files
+    """
+    from .lfs import LFSFilterDriver, LFSStore
+
+    with open_repo_closing(repo) as r:
+        # Initialize LFS if needed
+        lfs_store = LFSStore.from_repo(r, create=True)
+        filter_driver = LFSFilterDriver(lfs_store)
+
+        # Get current index
+        index = r.open_index()
+
+        migrated = 0
+
+        # Determine files to migrate
+        files_to_migrate = []
+
+        if everything:
+            # Migrate all files above 100MB
+            for path, entry in index.items():
+                full_path = os.path.join(r.path, path.decode())
+                if os.path.exists(full_path):
+                    size = os.path.getsize(full_path)
+                    if size > 100 * 1024 * 1024:  # 100MB
+                        files_to_migrate.append(path.decode())
+        else:
+            # Use include/exclude patterns
+            for path, entry in index.items():
+                path_str = path.decode()
+
+                # Check include patterns
+                if include:
+                    matched = any(
+                        fnmatch.fnmatch(path_str, pattern) for pattern in include
+                    )
+                    if not matched:
+                        continue
+
+                # Check exclude patterns
+                if exclude:
+                    excluded = any(
+                        fnmatch.fnmatch(path_str, pattern) for pattern in exclude
+                    )
+                    if excluded:
+                        continue
+
+                files_to_migrate.append(path_str)
+
+        # Migrate files
+        for path in files_to_migrate:
+            full_path = os.path.join(r.path, path)
+            if not os.path.exists(full_path):
+                continue
+
+            # Read file content
+            with open(full_path, "rb") as f:
+                content = f.read()
+
+            # Convert to LFS pointer
+            pointer_content = filter_driver.clean(content)
+
+            # Write pointer back to file
+            with open(full_path, "wb") as f:
+                f.write(pointer_content)
+
+            # Create blob for pointer content and update index
+            blob = Blob()
+            blob.data = pointer_content
+            r.object_store.add_object(blob)
+
+            st = os.stat(full_path)
+            index_entry = index_entry_from_stat(st, blob.id, 0)
+            index[path.encode()] = index_entry
+
+            migrated += 1
+
+        # Write updated index
+        index.write()
+
+        # Track patterns if include was specified
+        if include:
+            lfs_track(r, include)
+
+        return migrated
+
+
+def lfs_pointer_check(repo=".", paths=None):
+    """Check if files are valid LFS pointers.
+
+    Args:
+      repo: Path to repository
+      paths: List of file paths to check (if None, check all files)
+
+    Returns:
+      Dict mapping paths to LFSPointer objects (or None if not a pointer)
+    """
+    from .lfs import LFSPointer
+
+    with open_repo_closing(repo) as r:
+        results = {}
+
+        if paths is None:
+            # Check all files in index
+            index = r.open_index()
+            paths = [path.decode() for path in index]
+
+        for path in paths:
+            full_path = os.path.join(r.path, path)
+            if os.path.exists(full_path):
+                try:
+                    with open(full_path, "rb") as f:
+                        content = f.read()
+                    pointer = LFSPointer.from_bytes(content)
+                    results[path] = pointer
+                except OSError:
+                    results[path] = None
+            else:
+                results[path] = None
+
+        return results
+
+
+def lfs_fetch(repo=".", remote="origin", refs=None):
+    """Fetch LFS objects from remote.
+
+    Args:
+      repo: Path to repository
+      remote: Remote name (default: origin)
+      refs: Specific refs to fetch LFS objects for (default: all refs)
+
+    Returns:
+      Number of objects fetched
+    """
+    from .lfs import LFSClient, LFSPointer, LFSStore
+
+    with open_repo_closing(repo) as r:
+        # Get LFS server URL from config
+        config = r.get_config()
+        lfs_url = config.get((b"lfs",), b"url")
+        if not lfs_url:
+            # Try remote URL
+            remote_url = config.get((b"remote", remote.encode()), b"url")
+            if remote_url:
+                # Append /info/lfs to remote URL
+                remote_url = remote_url.decode()
+                if remote_url.endswith(".git"):
+                    remote_url = remote_url[:-4]
+                lfs_url = f"{remote_url}/info/lfs"
+            else:
+                raise ValueError(f"No LFS URL configured for remote {remote}")
+        else:
+            lfs_url = lfs_url.decode()
+
+        # Get authentication
+        auth = None
+        # TODO: Support credential helpers and other auth methods
+
+        # Create LFS client and store
+        client = LFSClient(lfs_url, auth)
+        store = LFSStore.from_repo(r)
+
+        # Find all LFS pointers in the refs
+        pointers_to_fetch = []
+
+        if refs is None:
+            # Get all refs
+            refs = list(r.refs.keys())
+
+        for ref in refs:
+            if isinstance(ref, str):
+                ref = ref.encode()
+            try:
+                commit = r[r.refs[ref]]
+            except KeyError:
+                continue
+
+            # Walk the commit tree
+            for entry in r.object_store.iter_tree_contents(commit.tree):
+                try:
+                    obj = r.object_store[entry.sha]
+                    if obj.type_name == b"blob":
+                        pointer = LFSPointer.from_bytes(obj.data)
+                        if pointer and pointer.is_valid_oid():
+                            # Check if we already have it
+                            try:
+                                store.open_object(pointer.oid)
+                            except KeyError:
+                                pointers_to_fetch.append((pointer.oid, pointer.size))
+                except KeyError:
+                    pass
+
+        # Fetch missing objects
+        fetched = 0
+        for oid, size in pointers_to_fetch:
+            try:
+                content = client.download(oid, size)
+                store.write_object([content])
+                fetched += 1
+            except Exception as e:
+                # Log error but continue
+                print(f"Failed to fetch {oid}: {e}")
+
+        return fetched
+
+
+def lfs_pull(repo=".", remote="origin"):
+    """Pull LFS objects for current checkout.
+
+    Args:
+      repo: Path to repository
+      remote: Remote name (default: origin)
+
+    Returns:
+      Number of objects fetched
+    """
+    from .lfs import LFSPointer, LFSStore
+
+    with open_repo_closing(repo) as r:
+        # First do a fetch for HEAD
+        fetched = lfs_fetch(repo, remote, [b"HEAD"])
+
+        # Then checkout LFS files in working directory
+        store = LFSStore.from_repo(r)
+        index = r.open_index()
+
+        for path, entry in index.items():
+            full_path = os.path.join(r.path, path.decode())
+            if os.path.exists(full_path):
+                with open(full_path, "rb") as f:
+                    content = f.read()
+
+                pointer = LFSPointer.from_bytes(content)
+                if pointer and pointer.is_valid_oid():
+                    try:
+                        # Replace pointer with actual content
+                        with store.open_object(pointer.oid) as lfs_file:
+                            lfs_content = lfs_file.read()
+                        with open(full_path, "wb") as f:
+                            f.write(lfs_content)
+                    except KeyError:
+                        # Object not available
+                        pass
+
+        return fetched
+
+
+def lfs_push(repo=".", remote="origin", refs=None):
+    """Push LFS objects to remote.
+
+    Args:
+      repo: Path to repository
+      remote: Remote name (default: origin)
+      refs: Specific refs to push LFS objects for (default: current branch)
+
+    Returns:
+      Number of objects pushed
+    """
+    from .lfs import LFSClient, LFSPointer, LFSStore
+
+    with open_repo_closing(repo) as r:
+        # Get LFS server URL from config
+        config = r.get_config()
+        lfs_url = config.get((b"lfs",), b"url")
+        if not lfs_url:
+            # Try remote URL
+            remote_url = config.get((b"remote", remote.encode()), b"url")
+            if remote_url:
+                # Append /info/lfs to remote URL
+                remote_url = remote_url.decode()
+                if remote_url.endswith(".git"):
+                    remote_url = remote_url[:-4]
+                lfs_url = f"{remote_url}/info/lfs"
+            else:
+                raise ValueError(f"No LFS URL configured for remote {remote}")
+        else:
+            lfs_url = lfs_url.decode()
+
+        # Get authentication
+        auth = None
+        # TODO: Support credential helpers and other auth methods
+
+        # Create LFS client and store
+        client = LFSClient(lfs_url, auth)
+        store = LFSStore.from_repo(r)
+
+        # Find all LFS objects to push
+        if refs is None:
+            # Push current branch
+            refs = [r.refs.read_ref(b"HEAD")]
+
+        objects_to_push = set()
+
+        for ref in refs:
+            if isinstance(ref, str):
+                ref = ref.encode()
+            try:
+                if ref.startswith(b"refs/"):
+                    commit = r[r.refs[ref]]
+                else:
+                    commit = r[ref]
+            except KeyError:
+                continue
+
+            # Walk the commit tree
+            for entry in r.object_store.iter_tree_contents(commit.tree):
+                try:
+                    obj = r.object_store[entry.sha]
+                    if obj.type_name == b"blob":
+                        pointer = LFSPointer.from_bytes(obj.data)
+                        if pointer and pointer.is_valid_oid():
+                            objects_to_push.add((pointer.oid, pointer.size))
+                except KeyError:
+                    pass
+
+        # Push objects
+        pushed = 0
+        for oid, size in objects_to_push:
+            try:
+                with store.open_object(oid) as f:
+                    content = f.read()
+                client.upload(oid, size, content)
+                pushed += 1
+            except KeyError:
+                # Object not in local store
+                print(f"Warning: LFS object {oid} not found locally")
+            except Exception as e:
+                # Log error but continue
+                print(f"Failed to push {oid}: {e}")
+
+        return pushed
+
+
+def lfs_status(repo="."):
+    """Show status of LFS files.
+
+    Args:
+      repo: Path to repository
+
+    Returns:
+      Dict with status information
+    """
+    from .lfs import LFSPointer, LFSStore
+
+    with open_repo_closing(repo) as r:
+        store = LFSStore.from_repo(r)
+        index = r.open_index()
+
+        status = {
+            "tracked": [],
+            "not_staged": [],
+            "not_committed": [],
+            "not_pushed": [],
+            "missing": [],
+        }
+
+        # Check working directory files
+        for path, entry in index.items():
+            path_str = path.decode()
+            full_path = os.path.join(r.path, path_str)
+
+            if os.path.exists(full_path):
+                with open(full_path, "rb") as f:
+                    content = f.read()
+
+                pointer = LFSPointer.from_bytes(content)
+                if pointer and pointer.is_valid_oid():
+                    status["tracked"].append(path_str)
+
+                    # Check if object exists locally
+                    try:
+                        store.open_object(pointer.oid)
+                    except KeyError:
+                        status["missing"].append(path_str)
+
+                    # Check if file has been modified
+                    try:
+                        staged_obj = r.object_store[entry.binsha]
+                        staged_pointer = LFSPointer.from_bytes(staged_obj.data)
+                        if staged_pointer and staged_pointer.oid != pointer.oid:
+                            status["not_staged"].append(path_str)
+                    except KeyError:
+                        pass
+
+        # TODO: Check for not committed and not pushed files
+
+        return status

+ 97 - 16
dulwich/repo.py

@@ -73,7 +73,6 @@ from .hooks import (
     PostReceiveShellHook,
     PostReceiveShellHook,
     PreCommitShellHook,
     PreCommitShellHook,
 )
 )
-from .line_ending import BlobNormalizer, TreeBlobNormalizer
 from .object_store import (
 from .object_store import (
     DiskObjectStore,
     DiskObjectStore,
     MemoryObjectStore,
     MemoryObjectStore,
@@ -761,6 +760,24 @@ class BaseRepo:
         """
         """
         raise NotImplementedError(self.get_rebase_state_manager)
         raise NotImplementedError(self.get_rebase_state_manager)
 
 
+    def get_blob_normalizer(self):
+        """Return a BlobNormalizer object for checkin/checkout operations.
+
+        Returns: BlobNormalizer instance
+        """
+        raise NotImplementedError(self.get_blob_normalizer)
+
+    def get_gitattributes(self, tree: Optional[bytes] = None) -> "GitAttributes":
+        """Read gitattributes for the repository.
+
+        Args:
+            tree: Tree SHA to read .gitattributes from (defaults to HEAD)
+
+        Returns:
+            GitAttributes object that can be used to match paths
+        """
+        raise NotImplementedError(self.get_gitattributes)
+
     def get_config_stack(self) -> "StackedConfig":
     def get_config_stack(self) -> "StackedConfig":
         """Return a config stack for this repository.
         """Return a config stack for this repository.
 
 
@@ -2078,24 +2095,58 @@ class Repo(BaseRepo):
     def __exit__(self, exc_type, exc_val, exc_tb):
     def __exit__(self, exc_type, exc_val, exc_tb):
         self.close()
         self.close()
 
 
+    def _read_gitattributes(self) -> dict[bytes, dict[bytes, bytes]]:
+        """Read .gitattributes file from working tree.
+
+        Returns:
+            Dictionary mapping file patterns to attributes
+        """
+        gitattributes = {}
+        gitattributes_path = os.path.join(self.path, ".gitattributes")
+
+        if os.path.exists(gitattributes_path):
+            with open(gitattributes_path, "rb") as f:
+                for line in f:
+                    line = line.strip()
+                    if not line or line.startswith(b"#"):
+                        continue
+
+                    parts = line.split()
+                    if len(parts) < 2:
+                        continue
+
+                    pattern = parts[0]
+                    attrs = {}
+
+                    for attr in parts[1:]:
+                        if attr.startswith(b"-"):
+                            # Unset attribute
+                            attrs[attr[1:]] = b"false"
+                        elif b"=" in attr:
+                            # Set to value
+                            key, value = attr.split(b"=", 1)
+                            attrs[key] = value
+                        else:
+                            # Set attribute
+                            attrs[attr] = b"true"
+
+                    gitattributes[pattern] = attrs
+
+        return gitattributes
+
     def get_blob_normalizer(self):
     def get_blob_normalizer(self):
         """Return a BlobNormalizer object."""
         """Return a BlobNormalizer object."""
-        # TODO Parse the git attributes files
-        git_attributes = {}
+        from .filters import FilterBlobNormalizer, FilterRegistry
+
+        # Get proper GitAttributes object
+        git_attributes = self.get_gitattributes()
         config_stack = self.get_config_stack()
         config_stack = self.get_config_stack()
-        try:
-            head_sha = self.refs[b"HEAD"]
-            # Peel tags to get the underlying commit
-            _, obj = peel_sha(self.object_store, head_sha)
-            tree = obj.tree
-            return TreeBlobNormalizer(
-                config_stack,
-                git_attributes,
-                self.object_store,
-                tree,
-            )
-        except KeyError:
-            return BlobNormalizer(config_stack, git_attributes)
+
+        # Create FilterRegistry with repo reference
+        filter_registry = FilterRegistry(config_stack, self)
+
+        # Return FilterBlobNormalizer which handles all filters including line endings
+        return FilterBlobNormalizer(config_stack, git_attributes, filter_registry, self)
 
 
     def get_gitattributes(self, tree: Optional[bytes] = None) -> "GitAttributes":
     def get_gitattributes(self, tree: Optional[bytes] = None) -> "GitAttributes":
         """Read gitattributes for the repository.
         """Read gitattributes for the repository.
@@ -2152,6 +2203,14 @@ class Repo(BaseRepo):
                     pattern = Pattern(pattern_bytes)
                     pattern = Pattern(pattern_bytes)
                     patterns.append((pattern, attrs))
                     patterns.append((pattern, attrs))
 
 
+        # Read .gitattributes from working directory (if it exists)
+        working_attrs_path = os.path.join(self.path, ".gitattributes")
+        if os.path.exists(working_attrs_path):
+            with open(working_attrs_path, "rb") as f:
+                for pattern_bytes, attrs in parse_git_attributes(f):
+                    pattern = Pattern(pattern_bytes)
+                    patterns.append((pattern, attrs))
+
         return GitAttributes(patterns)
         return GitAttributes(patterns)
 
 
     def _sparse_checkout_file_path(self) -> str:
     def _sparse_checkout_file_path(self) -> str:
@@ -2318,6 +2377,28 @@ class MemoryRepo(BaseRepo):
 
 
         return MemoryRebaseStateManager(self)
         return MemoryRebaseStateManager(self)
 
 
+    def get_blob_normalizer(self):
+        """Return a BlobNormalizer object for checkin/checkout operations."""
+        from .filters import FilterBlobNormalizer, FilterRegistry
+
+        # Get GitAttributes object
+        git_attributes = self.get_gitattributes()
+        config_stack = self.get_config_stack()
+
+        # Create FilterRegistry with repo reference
+        filter_registry = FilterRegistry(config_stack, self)
+
+        # Return FilterBlobNormalizer which handles all filters
+        return FilterBlobNormalizer(config_stack, git_attributes, filter_registry, self)
+
+    def get_gitattributes(self, tree: Optional[bytes] = None) -> "GitAttributes":
+        """Read gitattributes for the repository."""
+        from .attrs import GitAttributes
+
+        # Memory repos don't have working trees or gitattributes files
+        # Return empty GitAttributes
+        return GitAttributes([])
+
     @classmethod
     @classmethod
     def init_bare(cls, objects, refs, format: Optional[int] = None):
     def init_bare(cls, objects, refs, format: Optional[int] = None):
         """Create a new bare repository in memory.
         """Create a new bare repository in memory.

+ 2 - 0
tests/__init__.py

@@ -155,6 +155,8 @@ def self_test_suite():
         "patch",
         "patch",
         "porcelain",
         "porcelain",
         "porcelain_cherry_pick",
         "porcelain_cherry_pick",
+        "porcelain_filters",
+        "porcelain_lfs",
         "porcelain_merge",
         "porcelain_merge",
         "porcelain_notes",
         "porcelain_notes",
         "protocol",
         "protocol",

+ 1 - 0
tests/compat/__init__.py

@@ -32,6 +32,7 @@ def test_suite():
         "commit_graph",
         "commit_graph",
         "dumb",
         "dumb",
         "index",
         "index",
+        "lfs",
         "pack",
         "pack",
         "patch",
         "patch",
         "porcelain",
         "porcelain",

+ 7 - 0
tests/compat/server_utils.py

@@ -158,6 +158,7 @@ class ServerTests:
         new_repo_dir = os.path.join(new_repo_base_dir, "empty_new")
         new_repo_dir = os.path.join(new_repo_base_dir, "empty_new")
         run_git_or_fail(["clone", self.url(port), new_repo_dir], cwd=new_repo_base_dir)
         run_git_or_fail(["clone", self.url(port), new_repo_dir], cwd=new_repo_base_dir)
         new_repo = Repo(new_repo_dir)
         new_repo = Repo(new_repo_dir)
+        self.addCleanup(new_repo.close)
         self.assertReposEqual(self._old_repo, new_repo)
         self.assertReposEqual(self._old_repo, new_repo)
 
 
     def test_lsremote_from_dulwich(self) -> None:
     def test_lsremote_from_dulwich(self) -> None:
@@ -184,7 +185,9 @@ class ServerTests:
                 self._stub_repo.path,
                 self._stub_repo.path,
             ]
             ]
         )
         )
+        self._stub_repo.close()
         clone = self._stub_repo = Repo(self._stub_repo.path)
         clone = self._stub_repo = Repo(self._stub_repo.path)
+        self.addCleanup(clone.close)
         expected_shallow = [
         expected_shallow = [
             b"35e0b59e187dd72a0af294aedffc213eaa4d03ff",
             b"35e0b59e187dd72a0af294aedffc213eaa4d03ff",
             b"514dc6d3fbfe77361bcaef320c4d21b72bc10be9",
             b"514dc6d3fbfe77361bcaef320c4d21b72bc10be9",
@@ -247,7 +250,9 @@ class ServerTests:
                 self._stub_repo.path,
                 self._stub_repo.path,
             ]
             ]
         )
         )
+        self._stub_repo.close()
         clone = self._stub_repo = Repo(self._stub_repo.path)
         clone = self._stub_repo = Repo(self._stub_repo.path)
+        self.addCleanup(clone.close)
 
 
         # Fetching at the same depth is a no-op.
         # Fetching at the same depth is a no-op.
         run_git_or_fail(
         run_git_or_fail(
@@ -279,7 +284,9 @@ class ServerTests:
                 self._stub_repo.path,
                 self._stub_repo.path,
             ]
             ]
         )
         )
+        self._stub_repo.close()
         clone = self._stub_repo = Repo(self._stub_repo.path)
         clone = self._stub_repo = Repo(self._stub_repo.path)
+        self.addCleanup(clone.close)
 
 
         # Fetching at the same depth is a no-op.
         # Fetching at the same depth is a no-op.
         run_git_or_fail(
         run_git_or_fail(

+ 9 - 0
tests/compat/test_commit_graph.py

@@ -137,6 +137,7 @@ class CommitGraphCompatTests(CompatTestCase):
 
 
         # Open the repository with dulwich
         # Open the repository with dulwich
         repo = Repo(self.repo_path)
         repo = Repo(self.repo_path)
+        self.addCleanup(repo.close)
 
 
         # Verify that all commits in the graph are accessible
         # Verify that all commits in the graph are accessible
         for entry in commit_graph:
         for entry in commit_graph:
@@ -165,6 +166,7 @@ class CommitGraphCompatTests(CompatTestCase):
         commits, work_dir = self.create_test_repo_with_history()
         commits, work_dir = self.create_test_repo_with_history()
 
 
         repo = Repo(self.repo_path)
         repo = Repo(self.repo_path)
+        self.addCleanup(repo.close)
 
 
         # Get some commit IDs for testing
         # Get some commit IDs for testing
         main_head = repo.refs[b"refs/heads/master"]
         main_head = repo.refs[b"refs/heads/master"]
@@ -177,7 +179,9 @@ class CommitGraphCompatTests(CompatTestCase):
         run_git_or_fail(["commit-graph", "write", "--reachable"], cwd=work_dir)
         run_git_or_fail(["commit-graph", "write", "--reachable"], cwd=work_dir)
 
 
         # Force reload of repository to pick up commit graph
         # Force reload of repository to pick up commit graph
+        repo.close()
         repo = Repo(self.repo_path)
         repo = Repo(self.repo_path)
+        self.addCleanup(repo.close)
 
 
         # Calculate merge base with commit graph
         # Calculate merge base with commit graph
         merge_base_with_graph = find_merge_base(repo, [main_head, feature_head])
         merge_base_with_graph = find_merge_base(repo, [main_head, feature_head])
@@ -206,6 +210,7 @@ class CommitGraphCompatTests(CompatTestCase):
         commits, work_dir = self.create_test_repo_with_history()
         commits, work_dir = self.create_test_repo_with_history()
 
 
         repo = Repo(self.repo_path)
         repo = Repo(self.repo_path)
+        self.addCleanup(repo.close)
 
 
         # Test with a simple fast-forward case (older commit to newer commit)
         # Test with a simple fast-forward case (older commit to newer commit)
         commit1 = commits[1]  # Second commit
         commit1 = commits[1]  # Second commit
@@ -218,7 +223,9 @@ class CommitGraphCompatTests(CompatTestCase):
         run_git_or_fail(["commit-graph", "write", "--reachable"], cwd=work_dir)
         run_git_or_fail(["commit-graph", "write", "--reachable"], cwd=work_dir)
 
 
         # Force reload
         # Force reload
+        repo.close()
         repo = Repo(self.repo_path)
         repo = Repo(self.repo_path)
+        self.addCleanup(repo.close)
 
 
         # Check with commit graph
         # Check with commit graph
         can_ff_with_graph = can_fast_forward(repo, commit1, commit2)
         can_ff_with_graph = can_fast_forward(repo, commit1, commit2)
@@ -259,6 +266,7 @@ class CommitGraphCompatTests(CompatTestCase):
         commit_graph = read_commit_graph(graph_file)
         commit_graph = read_commit_graph(graph_file)
 
 
         repo = Repo(self.repo_path)
         repo = Repo(self.repo_path)
+        self.addCleanup(repo.close)
 
 
         # Build a map of commit to generation number
         # Build a map of commit to generation number
         generation_map = {}
         generation_map = {}
@@ -415,6 +423,7 @@ class CommitGraphCompatTests(CompatTestCase):
         commit_graph = read_commit_graph(graph_file)
         commit_graph = read_commit_graph(graph_file)
 
 
         repo = Repo(self.repo_path)
         repo = Repo(self.repo_path)
+        self.addCleanup(repo.close)
 
 
         # Verify tagged commits are in the graph
         # Verify tagged commits are in the graph
         tagged_commits = [commits[2], commits[4]]
         tagged_commits = [commits[2], commits[4]]

+ 371 - 0
tests/compat/test_lfs.py

@@ -0,0 +1,371 @@
+#!/usr/bin/python
+# test_lfs.py -- Compatibility tests for LFS.
+# Copyright (C) 2025 Dulwich contributors
+#
+# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
+# General Public License as public by the Free Software Foundation; version 2.0
+# or (at your option) any later version. You can redistribute it and/or
+# modify it under the terms of either of these two licenses.
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# You should have received a copy of the licenses; if not, see
+# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
+# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
+# License, Version 2.0.
+#
+
+"""Compatibility tests for LFS functionality between dulwich and git-lfs."""
+
+import os
+import subprocess
+import tempfile
+from unittest import skipUnless
+
+from dulwich import porcelain
+from dulwich.lfs import LFSPointer
+from dulwich.porcelain import lfs_clean, lfs_init, lfs_smudge, lfs_track
+
+from .utils import CompatTestCase, rmtree_ro, run_git_or_fail
+
+
+def git_lfs_version():
+    """Get git-lfs version tuple."""
+    try:
+        output = run_git_or_fail(["lfs", "version"])
+        # Example output: "git-lfs/3.0.2 (GitHub; linux amd64; go 1.17.2)"
+        version_str = output.split(b"/")[1].split()[0]
+        return tuple(map(int, version_str.decode().split(".")))
+    except (OSError, subprocess.CalledProcessError, AssertionError):
+        return None
+
+
+class LFSCompatTestCase(CompatTestCase):
+    """Base class for LFS compatibility tests."""
+
+    min_git_version = (2, 0, 0)  # git-lfs requires git 2.0+
+
+    def setUp(self):
+        super().setUp()
+        if git_lfs_version() is None:
+            self.skipTest("git-lfs not available")
+
+    def assertPointerEquals(self, pointer1, pointer2):
+        """Assert two LFS pointers are equivalent."""
+        self.assertEqual(pointer1.oid, pointer2.oid)
+        self.assertEqual(pointer1.size, pointer2.size)
+
+    def make_temp_dir(self):
+        """Create a temporary directory that will be cleaned up."""
+        temp_dir = tempfile.mkdtemp()
+        self.addCleanup(rmtree_ro, temp_dir)
+        return temp_dir
+
+
+class LFSInitCompatTest(LFSCompatTestCase):
+    """Tests for LFS initialization compatibility."""
+
+    def test_lfs_init_dulwich(self):
+        """Test that dulwich lfs_init is compatible with git-lfs."""
+        # Initialize with dulwich
+        repo_dir = self.make_temp_dir()
+        run_git_or_fail(["init"], cwd=repo_dir)
+        lfs_init(repo_dir)
+
+        # Verify with git-lfs
+        output = run_git_or_fail(["lfs", "env"], cwd=repo_dir)
+        self.assertIn(b"git config filter.lfs.clean", output)
+        self.assertIn(b"git config filter.lfs.smudge", output)
+
+    def test_lfs_init_git(self):
+        """Test that git-lfs init is compatible with dulwich."""
+        # Initialize with git-lfs
+        repo_dir = self.make_temp_dir()
+        run_git_or_fail(["init"], cwd=repo_dir)
+        run_git_or_fail(["lfs", "install", "--local"], cwd=repo_dir)
+
+        # Verify with dulwich
+        repo = porcelain.open_repo(repo_dir)
+        self.addCleanup(repo.close)
+        config = repo.get_config_stack()
+        self.assertEqual(
+            config.get(("filter", "lfs"), "clean").decode(), "git-lfs clean -- %f"
+        )
+        self.assertEqual(
+            config.get(("filter", "lfs"), "smudge").decode(), "git-lfs smudge -- %f"
+        )
+
+
+class LFSTrackCompatTest(LFSCompatTestCase):
+    """Tests for LFS tracking compatibility."""
+
+    def test_track_dulwich(self):
+        """Test that dulwich lfs_track is compatible with git-lfs."""
+        repo_dir = self.make_temp_dir()
+        run_git_or_fail(["init"], cwd=repo_dir)
+        lfs_init(repo_dir)
+
+        # Track with dulwich
+        lfs_track(repo_dir, ["*.bin", "*.dat"])
+
+        # Verify with git-lfs
+        output = run_git_or_fail(["lfs", "track"], cwd=repo_dir)
+        self.assertIn(b"*.bin", output)
+        self.assertIn(b"*.dat", output)
+
+    def test_track_git(self):
+        """Test that git-lfs track is compatible with dulwich."""
+        repo_dir = self.make_temp_dir()
+        run_git_or_fail(["init"], cwd=repo_dir)
+        run_git_or_fail(["lfs", "install", "--local"], cwd=repo_dir)
+
+        # Track with git-lfs
+        run_git_or_fail(["lfs", "track", "*.bin"], cwd=repo_dir)
+        run_git_or_fail(["lfs", "track", "*.dat"], cwd=repo_dir)
+
+        # Verify with dulwich
+        gitattributes_path = os.path.join(repo_dir, ".gitattributes")
+        with open(gitattributes_path, "rb") as f:
+            content = f.read().decode()
+        self.assertIn("*.bin filter=lfs", content)
+        self.assertIn("*.dat filter=lfs", content)
+
+
+class LFSFileOperationsCompatTest(LFSCompatTestCase):
+    """Tests for LFS file operations compatibility."""
+
+    def test_add_commit_dulwich(self):
+        """Test adding and committing LFS files with dulwich."""
+        repo_dir = self.make_temp_dir()
+        run_git_or_fail(["init"], cwd=repo_dir)
+        lfs_init(repo_dir)
+        lfs_track(repo_dir, ["*.bin"])
+
+        # Create and add a large file
+        test_file = os.path.join(repo_dir, "test.bin")
+        test_content = b"x" * 1024 * 1024  # 1MB
+        with open(test_file, "wb") as f:
+            f.write(test_content)
+
+        # Add with dulwich
+        porcelain.add(repo_dir, [test_file])
+        porcelain.commit(repo_dir, message=b"Add LFS file")
+
+        # Verify with git-lfs
+        output = run_git_or_fail(["lfs", "ls-files"], cwd=repo_dir)
+        self.assertIn(b"test.bin", output)
+
+        # Check pointer file in git
+        output = run_git_or_fail(["show", "HEAD:test.bin"], cwd=repo_dir)
+        self.assertIn(b"version https://git-lfs.github.com/spec/v1", output)
+        self.assertIn(b"oid sha256:", output)
+        self.assertIn(b"size 1048576", output)
+
+    def test_add_commit_git(self):
+        """Test adding and committing LFS files with git-lfs."""
+        repo_dir = self.make_temp_dir()
+        run_git_or_fail(["init"], cwd=repo_dir)
+        run_git_or_fail(["lfs", "install", "--local"], cwd=repo_dir)
+        run_git_or_fail(["lfs", "track", "*.bin"], cwd=repo_dir)
+        run_git_or_fail(["add", ".gitattributes"], cwd=repo_dir)
+        run_git_or_fail(["commit", "-m", "Track .bin files"], cwd=repo_dir)
+
+        # Create and add a large file
+        test_file = os.path.join(repo_dir, "test.bin")
+        test_content = b"y" * 1024 * 1024  # 1MB
+        with open(test_file, "wb") as f:
+            f.write(test_content)
+
+        # Add with git-lfs
+        run_git_or_fail(["add", "test.bin"], cwd=repo_dir)
+        run_git_or_fail(["commit", "-m", "Add LFS file"], cwd=repo_dir)
+
+        # Verify with dulwich
+        repo = porcelain.open_repo(repo_dir)
+        self.addCleanup(repo.close)
+        tree = repo[repo.head()].tree
+        mode, sha = repo.object_store[tree][b"test.bin"]
+        blob = repo.object_store[sha]
+        pointer = LFSPointer.from_bytes(blob.data)
+        self.assertEqual(pointer.size, 1048576)
+
+    def test_checkout_dulwich(self):
+        """Test checking out LFS files with dulwich."""
+        # Create repo with git-lfs
+        repo_dir = self.make_temp_dir()
+        run_git_or_fail(["init"], cwd=repo_dir)
+        run_git_or_fail(["lfs", "install", "--local"], cwd=repo_dir)
+        run_git_or_fail(["lfs", "track", "*.bin"], cwd=repo_dir)
+        run_git_or_fail(["add", ".gitattributes"], cwd=repo_dir)
+        run_git_or_fail(["commit", "-m", "Track .bin files"], cwd=repo_dir)
+
+        # Add LFS file
+        test_file = os.path.join(repo_dir, "test.bin")
+        test_content = b"z" * 1024 * 1024  # 1MB
+        with open(test_file, "wb") as f:
+            f.write(test_content)
+        run_git_or_fail(["add", "test.bin"], cwd=repo_dir)
+        run_git_or_fail(["commit", "-m", "Add LFS file"], cwd=repo_dir)
+
+        # Remove working copy
+        os.remove(test_file)
+
+        # Checkout with dulwich
+        porcelain.reset(repo_dir, mode="hard")
+
+        # Verify file contents
+        with open(test_file, "rb") as f:
+            content = f.read()
+        self.assertEqual(content, test_content)
+
+
+class LFSPointerCompatTest(LFSCompatTestCase):
+    """Tests for LFS pointer file compatibility."""
+
+    def test_pointer_format_dulwich(self):
+        """Test that dulwich creates git-lfs compatible pointers."""
+        repo_dir = self.make_temp_dir()
+        run_git_or_fail(["init"], cwd=repo_dir)
+        lfs_init(repo_dir)
+
+        test_content = b"test content for LFS"
+        test_file = os.path.join(repo_dir, "test.txt")
+        with open(test_file, "wb") as f:
+            f.write(test_content)
+
+        # Create pointer with dulwich
+        pointer_data = lfs_clean(repo_dir, "test.txt")
+
+        # Parse with git-lfs (create a file and check)
+        test_file = os.path.join(repo_dir, "test_pointer")
+        with open(test_file, "wb") as f:
+            f.write(pointer_data)
+
+        # Verify pointer format
+        with open(test_file, "rb") as f:
+            lines = f.read().decode().strip().split("\n")
+
+        self.assertEqual(lines[0], "version https://git-lfs.github.com/spec/v1")
+        self.assertTrue(lines[1].startswith("oid sha256:"))
+        self.assertTrue(lines[2].startswith("size "))
+
+    def test_pointer_format_git(self):
+        """Test that dulwich can parse git-lfs pointers."""
+        # Create a git-lfs pointer manually
+        oid = "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
+        size = 12345
+        pointer_content = f"version https://git-lfs.github.com/spec/v1\noid sha256:{oid}\nsize {size}\n"
+
+        # Parse with dulwich
+        pointer = LFSPointer.from_bytes(pointer_content.encode())
+
+        self.assertEqual(pointer.oid, oid)
+        self.assertEqual(pointer.size, size)
+
+
+class LFSFilterCompatTest(LFSCompatTestCase):
+    """Tests for LFS filter operations compatibility."""
+
+    def test_clean_filter_compat(self):
+        """Test clean filter compatibility between dulwich and git-lfs."""
+        repo_dir = self.make_temp_dir()
+        run_git_or_fail(["init"], cwd=repo_dir)
+        lfs_init(repo_dir)
+
+        test_content = b"x" * 1000
+        test_file = os.path.join(repo_dir, "test.txt")
+        with open(test_file, "wb") as f:
+            f.write(test_content)
+
+        # Clean with dulwich
+        dulwich_pointer = lfs_clean(repo_dir, "test.txt")
+
+        # Clean with git-lfs (simulate)
+        # Since we can't easily invoke git-lfs clean directly,
+        # we'll test that the pointer format is correct
+        self.assertIn(b"version https://git-lfs.github.com/spec/v1", dulwich_pointer)
+        self.assertIn(b"oid sha256:", dulwich_pointer)
+        self.assertIn(b"size 1000", dulwich_pointer)
+
+    def test_smudge_filter_compat(self):
+        """Test smudge filter compatibility between dulwich and git-lfs."""
+        # Create a test repo with LFS
+        repo_dir = self.make_temp_dir()
+        run_git_or_fail(["init"], cwd=repo_dir)
+        lfs_init(repo_dir)
+
+        # Create test content
+        test_content = b"test data for smudge filter"
+        test_file = os.path.join(repo_dir, "test.txt")
+        with open(test_file, "wb") as f:
+            f.write(test_content)
+
+        pointer_data = lfs_clean(repo_dir, "test.txt")
+
+        # Store object in LFS
+        lfs_dir = os.path.join(repo_dir, ".git", "lfs")
+        os.makedirs(lfs_dir, exist_ok=True)
+
+        # Parse pointer to get oid
+        pointer = LFSPointer.from_bytes(pointer_data)
+
+        # Store object
+        obj_dir = os.path.join(lfs_dir, "objects", pointer.oid[:2], pointer.oid[2:4])
+        os.makedirs(obj_dir, exist_ok=True)
+        obj_path = os.path.join(obj_dir, pointer.oid)
+        with open(obj_path, "wb") as f:
+            f.write(test_content)
+
+        # Test smudge
+        smudged = lfs_smudge(repo_dir, pointer_data)
+        self.assertEqual(smudged, test_content)
+
+
+class LFSCloneCompatTest(LFSCompatTestCase):
+    """Tests for cloning repositories with LFS files."""
+
+    @skipUnless(
+        git_lfs_version() and git_lfs_version() >= (2, 0, 0),
+        "git-lfs 2.0+ required for clone tests",
+    )
+    def test_clone_with_lfs(self):
+        """Test cloning a repository with LFS files."""
+        # Create source repo with LFS
+        source_dir = self.make_temp_dir()
+        run_git_or_fail(["init"], cwd=source_dir)
+        run_git_or_fail(["lfs", "install", "--local"], cwd=source_dir)
+        run_git_or_fail(["lfs", "track", "*.bin"], cwd=source_dir)
+        run_git_or_fail(["add", ".gitattributes"], cwd=source_dir)
+        run_git_or_fail(["commit", "-m", "Track .bin files"], cwd=source_dir)
+
+        # Add LFS file
+        test_file = os.path.join(source_dir, "test.bin")
+        test_content = b"w" * 1024 * 1024  # 1MB
+        with open(test_file, "wb") as f:
+            f.write(test_content)
+        run_git_or_fail(["add", "test.bin"], cwd=source_dir)
+        run_git_or_fail(["commit", "-m", "Add LFS file"], cwd=source_dir)
+
+        # Clone with dulwich
+        target_dir = self.make_temp_dir()
+        cloned_repo = porcelain.clone(source_dir, target_dir)
+        self.addCleanup(cloned_repo.close)
+
+        # Verify LFS file exists as pointer
+        cloned_file = os.path.join(target_dir, "test.bin")
+        with open(cloned_file, "rb") as f:
+            content = f.read()
+
+        # Should be a pointer, not the full content
+        self.assertLess(len(content), 1000)  # Pointer is much smaller
+        self.assertIn(b"version https://git-lfs.github.com/spec/v1", content)
+
+
+if __name__ == "__main__":
+    import unittest
+
+    unittest.main()

+ 11 - 0
tests/compat/test_reftable.py

@@ -157,6 +157,7 @@ class ReftableCompatTestCase(CompatTestCase):
 
 
         # Open with Dulwich
         # Open with Dulwich
         repo = Repo(self.test_dir)
         repo = Repo(self.test_dir)
+        self.addCleanup(repo.close)
 
 
         # Verify it's using reftable
         # Verify it's using reftable
         self.assertIsInstance(repo.refs, ReftableRefsContainer)
         self.assertIsInstance(repo.refs, ReftableRefsContainer)
@@ -274,6 +275,7 @@ class ReftableCompatTestCase(CompatTestCase):
 
 
         # Read refs with Dulwich
         # Read refs with Dulwich
         repo = Repo(self.test_dir)
         repo = Repo(self.test_dir)
+        self.addCleanup(repo.close)
         dulwich_refs = repo.get_refs()
         dulwich_refs = repo.get_refs()
 
 
         # Compare non-symbolic refs
         # Compare non-symbolic refs
@@ -300,6 +302,7 @@ class ReftableCompatTestCase(CompatTestCase):
 
 
         # Read with both git and Dulwich
         # Read with both git and Dulwich
         repo = Repo(self.test_dir)
         repo = Repo(self.test_dir)
+        self.addCleanup(repo.close)
         dulwich_refs = repo.get_refs()
         dulwich_refs = repo.get_refs()
 
 
         git_output = self._run_git(["show-ref"])
         git_output = self._run_git(["show-ref"])
@@ -407,6 +410,7 @@ class ReftableCompatTestCase(CompatTestCase):
         ).strip()
         ).strip()
 
 
         repo = Repo(self.test_dir)
         repo = Repo(self.test_dir)
+        self.addCleanup(repo.close)
 
 
         # Test complex batched operations
         # Test complex batched operations
         with repo.refs.batch_update():
         with repo.refs.batch_update():
@@ -478,6 +482,7 @@ class ReftableCompatTestCase(CompatTestCase):
         ).strip()
         ).strip()
 
 
         repo = Repo(self.test_dir)
         repo = Repo(self.test_dir)
+        self.addCleanup(repo.close)
 
 
         # Create multiple refs
         # Create multiple refs
         with repo.refs.batch_update():
         with repo.refs.batch_update():
@@ -529,6 +534,7 @@ class ReftableCompatTestCase(CompatTestCase):
 
 
         # Create many refs efficiently
         # Create many refs efficiently
         repo = Repo(self.test_dir)
         repo = Repo(self.test_dir)
+        self.addCleanup(repo.close)
 
 
         with repo.refs.batch_update():
         with repo.refs.batch_update():
             # Create 50 branches
             # Create 50 branches
@@ -590,6 +596,7 @@ class ReftableCompatTestCase(CompatTestCase):
         ).strip()
         ).strip()
 
 
         repo = Repo(self.test_dir)
         repo = Repo(self.test_dir)
+        self.addCleanup(repo.close)
 
 
         # Create chain of symbolic refs
         # Create chain of symbolic refs
         with repo.refs.batch_update():
         with repo.refs.batch_update():
@@ -633,6 +640,7 @@ class ReftableCompatTestCase(CompatTestCase):
         ).strip()
         ).strip()
 
 
         repo = Repo(self.test_dir)
         repo = Repo(self.test_dir)
+        self.addCleanup(repo.close)
 
 
         # Test refs with special characters and structures
         # Test refs with special characters and structures
         special_refs = [
         special_refs = [
@@ -689,6 +697,7 @@ class ReftableCompatTestCase(CompatTestCase):
             commits.append(commit_sha)
             commits.append(commit_sha)
 
 
         repo = Repo(self.test_dir)
         repo = Repo(self.test_dir)
+        self.addCleanup(repo.close)
 
 
         # Simulate concurrent operations with multiple batch updates
         # Simulate concurrent operations with multiple batch updates
         # First batch: Create initial refs
         # First batch: Create initial refs
@@ -759,6 +768,7 @@ class ReftableCompatTestCase(CompatTestCase):
         ).strip()
         ).strip()
 
 
         repo = Repo(self.test_dir)
         repo = Repo(self.test_dir)
+        self.addCleanup(repo.close)
 
 
         with repo.refs.batch_update():
         with repo.refs.batch_update():
             repo.refs.set_if_equals(b"refs/heads/master", None, commit_sha)
             repo.refs.set_if_equals(b"refs/heads/master", None, commit_sha)
@@ -775,6 +785,7 @@ class ReftableCompatTestCase(CompatTestCase):
 
 
         # Verify dulwich can still read after git modifications
         # Verify dulwich can still read after git modifications
         repo = Repo(self.test_dir)
         repo = Repo(self.test_dir)
+        self.addCleanup(repo.close)
         dulwich_refs = repo.get_refs()
         dulwich_refs = repo.get_refs()
 
 
         # Should be able to read git-modified refs
         # Should be able to read git-modified refs

+ 2 - 0
tests/test_commit_graph.py

@@ -691,6 +691,7 @@ class CommitGraphGenerationTests(unittest.TestCase):
 
 
         # Verify commit graph is loaded by creating new repo instance
         # Verify commit graph is loaded by creating new repo instance
         repo2 = Repo(repo_path)
         repo2 = Repo(repo_path)
+        self.addCleanup(repo2.close)
         repo2.object_store = object_store
         repo2.object_store = object_store
 
 
         # Verify commit graph is available
         # Verify commit graph is available
@@ -780,6 +781,7 @@ class CommitGraphGenerationTests(unittest.TestCase):
 
 
         # Create new repo instance to pick up commit graph
         # Create new repo instance to pick up commit graph
         repo2 = Repo(repo_path)
         repo2 = Repo(repo_path)
+        self.addCleanup(repo2.close)
         repo2.object_store = object_store
         repo2.object_store = object_store
 
 
         # Verify commit graph is loaded
         # Verify commit graph is loaded

+ 170 - 228
tests/test_filters.py

@@ -1,4 +1,4 @@
-# test_filters.py -- tests for filter drivers
+# test_filters.py -- Tests for filters
 # Copyright (C) 2024 Jelmer Vernooij <jelmer@jelmer.uk>
 # Copyright (C) 2024 Jelmer Vernooij <jelmer@jelmer.uk>
 #
 #
 # SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
 # SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
@@ -19,239 +19,181 @@
 # License, Version 2.0.
 # License, Version 2.0.
 #
 #
 
 
-"""Tests for filter drivers support."""
+"""Tests for filters."""
 
 
-import sys
-from unittest import skipIf
+import os
+import tempfile
+import unittest
 
 
-from dulwich.config import ConfigDict
-from dulwich.filters import (
-    FilterBlobNormalizer,
-    FilterRegistry,
-    ProcessFilterDriver,
-    get_filter_for_path,
-)
-from dulwich.objects import Blob
+from dulwich import porcelain
+from dulwich.repo import Repo
 
 
 from . import TestCase
 from . import TestCase
 
 
 
 
-class ProcessFilterDriverTests(TestCase):
-    @skipIf(sys.platform == "win32", "Unix shell commands")
-    def test_clean_filter(self) -> None:
-        """Test clean filter with external command."""
-        # Use a simple command that converts to uppercase
-        driver = ProcessFilterDriver(clean_cmd="tr '[:lower:]' '[:upper:]'")
-        result = driver.clean(b"hello world")
-        self.assertEqual(result, b"HELLO WORLD")
-
-    @skipIf(sys.platform == "win32", "Unix shell commands")
-    def test_smudge_filter(self) -> None:
-        """Test smudge filter with external command."""
-        # Use a simple command that converts to lowercase
-        driver = ProcessFilterDriver(smudge_cmd="tr '[:upper:]' '[:lower:]'")
-        result = driver.smudge(b"HELLO WORLD")
-        self.assertEqual(result, b"hello world")
-
-    def test_no_filters(self) -> None:
-        """Test driver with no filters configured."""
-        driver = ProcessFilterDriver()
-        data = b"test data"
-        self.assertEqual(driver.clean(data), data)
-        self.assertEqual(driver.smudge(data), data)
-
-    @skipIf(sys.platform == "win32", "Unix shell commands")
-    def test_failing_filter(self) -> None:
-        """Test that failing filter propagates the error."""
-        import subprocess
-
-        # Use a command that will fail
-        driver = ProcessFilterDriver(clean_cmd="false")
-        data = b"test data"
-        # Should raise CalledProcessError
-        with self.assertRaises(subprocess.CalledProcessError):
-            driver.clean(data)
-
-        # Test smudge filter too
-        driver = ProcessFilterDriver(smudge_cmd="false")
-        with self.assertRaises(subprocess.CalledProcessError):
-            driver.smudge(data)
-
-
-class FilterRegistryTests(TestCase):
-    def setUp(self) -> None:
-        super().setUp()
-        self.config = ConfigDict()
-        self.registry = FilterRegistry(self.config)
-
-    def test_register_and_get_driver(self) -> None:
-        """Test registering and retrieving a driver."""
-        driver = ProcessFilterDriver(clean_cmd="cat")
-        self.registry.register_driver("test", driver)
-
-        retrieved = self.registry.get_driver("test")
-        self.assertIs(retrieved, driver)
-
-    def test_get_nonexistent_driver(self) -> None:
-        """Test getting a non-existent driver."""
-        result = self.registry.get_driver("nonexistent")
-        self.assertIsNone(result)
-
-    def test_register_factory(self) -> None:
-        """Test registering a driver factory."""
-        created_driver = ProcessFilterDriver(clean_cmd="cat")
-
-        def factory(registry):
-            return created_driver
-
-        self.registry.register_factory("test", factory)
-
-        # Getting driver should invoke factory
-        retrieved = self.registry.get_driver("test")
-        self.assertIs(retrieved, created_driver)
-
-        # Second get should return cached instance
-        retrieved2 = self.registry.get_driver("test")
-        self.assertIs(retrieved2, created_driver)
-
-    def test_create_from_config(self) -> None:
-        """Test creating driver from config."""
-        # Set up config using the proper Config interface
-        self.config.set(("filter", "test"), "clean", b"cat")
-        self.config.set(("filter", "test"), "smudge", b"tac")
-
-        # Get driver (should be created from config)
-        driver = self.registry.get_driver("test")
-        self.assertIsNotNone(driver)
-        self.assertIsInstance(driver, ProcessFilterDriver)
-        self.assertEqual(driver.clean_cmd, "cat")
-        self.assertEqual(driver.smudge_cmd, "tac")
-
-    def test_builtin_lfs_factory(self) -> None:
-        """Test that LFS filter is available as a built-in."""
-        from dulwich.lfs import LFSFilterDriver
-
-        # Should be able to get LFS filter without explicit registration
-        driver = self.registry.get_driver("lfs")
-        self.assertIsNotNone(driver)
-        self.assertIsInstance(driver, LFSFilterDriver)
-
-
-class GetFilterForPathTests(TestCase):
-    def setUp(self) -> None:
-        super().setUp()
-        self.registry = FilterRegistry()
-        self.driver = ProcessFilterDriver(clean_cmd="cat")
-        self.registry.register_driver("test", self.driver)
-
-    def test_get_filter_for_path(self) -> None:
-        """Test getting filter for a path with filter attribute."""
-        gitattributes = {
-            b"*.txt": {b"filter": b"test"},
-        }
-
-        result = get_filter_for_path(b"file.txt", gitattributes, self.registry)
-        self.assertIs(result, self.driver)
-
-    def test_no_filter_attribute(self) -> None:
-        """Test path with no filter attribute."""
-        gitattributes = {
-            b"*.txt": {b"text": b"auto"},
-        }
-
-        result = get_filter_for_path(b"file.txt", gitattributes, self.registry)
-        self.assertIsNone(result)
+class GitAttributesFilterIntegrationTests(TestCase):
+    """Test gitattributes integration with filter drivers."""
 
 
-    def test_no_matching_pattern(self) -> None:
-        """Test path with no matching pattern."""
-        gitattributes = {
-            b"*.jpg": {b"filter": b"test"},
-        }
-
-        result = get_filter_for_path(b"file.txt", gitattributes, self.registry)
-        self.assertIsNone(result)
-
-    def test_filter_not_registered(self) -> None:
-        """Test path with filter that's not registered."""
-        gitattributes = {
-            b"*.txt": {b"filter": b"nonexistent"},
-        }
-
-        result = get_filter_for_path(b"file.txt", gitattributes, self.registry)
-        self.assertIsNone(result)
-
-
-class FilterBlobNormalizerTests(TestCase):
     def setUp(self) -> None:
     def setUp(self) -> None:
         super().setUp()
         super().setUp()
-        self.config = ConfigDict()
-        self.registry = FilterRegistry(self.config)
-        self.gitattributes = {}
-        self.normalizer = FilterBlobNormalizer(
-            self.config, self.gitattributes, self.registry
-        )
-
-    def test_no_filter(self) -> None:
-        """Test normalizer with no filter defined."""
-        blob = Blob()
-        blob.data = b"test content"
-
-        # Both checkin and checkout should return blob unchanged
-        result = self.normalizer.checkin_normalize(blob, b"file.txt")
-        self.assertIs(result, blob)
-
-        result = self.normalizer.checkout_normalize(blob, b"file.txt")
-        self.assertIs(result, blob)
-
-    def test_with_filter(self) -> None:
-        """Test normalizer with a filter defined."""
-
-        # Create a simple filter that converts to uppercase on clean
-        # and lowercase on smudge
-        class TestFilter:
-            def clean(self, data):
-                return data.upper()
-
-            def smudge(self, data):
-                return data.lower()
-
-        # Register the filter and set it in gitattributes
-        self.registry.register_driver("test", TestFilter())
-        self.gitattributes[b"*.txt"] = {b"filter": b"test"}
-
-        blob = Blob()
-        blob.data = b"Test Content"
-
-        # Checkin should uppercase
-        result = self.normalizer.checkin_normalize(blob, b"file.txt")
-        self.assertEqual(result.data, b"TEST CONTENT")
-        self.assertIsNot(result, blob)  # Should be a new blob
-
-        # Checkout should lowercase
-        result = self.normalizer.checkout_normalize(blob, b"file.txt")
-        self.assertEqual(result.data, b"test content")
-        self.assertIsNot(result, blob)  # Should be a new blob
-
-    def test_filter_returns_same_data(self) -> None:
-        """Test that normalizer returns same blob if filter doesn't change data."""
-
-        # Create a filter that returns data unchanged
-        class NoOpFilter:
-            def clean(self, data):
-                return data
-
-            def smudge(self, data):
-                return data
-
-        self.registry.register_driver("noop", NoOpFilter())
-        self.gitattributes[b"*.txt"] = {b"filter": b"noop"}
-
-        blob = Blob()
-        blob.data = b"unchanged content"
-
-        # Both operations should return the same blob instance
-        result = self.normalizer.checkin_normalize(blob, b"file.txt")
-        self.assertIs(result, blob)
-
-        result = self.normalizer.checkout_normalize(blob, b"file.txt")
-        self.assertIs(result, blob)
+        self.test_dir = tempfile.mkdtemp()
+        self.addCleanup(self._cleanup_test_dir)
+        self.repo = Repo.init(self.test_dir)
+
+    def _cleanup_test_dir(self) -> None:
+        """Clean up test directory."""
+        import shutil
+
+        shutil.rmtree(self.test_dir)
+
+    def test_gitattributes_text_filter(self) -> None:
+        """Test that text attribute triggers line ending conversion."""
+        # Configure autocrlf first
+        config = self.repo.get_config()
+        config.set((b"core",), b"autocrlf", b"true")
+        config.write_to_path()
+
+        # Create .gitattributes with text attribute
+        gitattributes_path = os.path.join(self.test_dir, ".gitattributes")
+        with open(gitattributes_path, "wb") as f:
+            f.write(b"*.txt text\n")
+            f.write(b"*.bin -text\n")
+
+        # Add .gitattributes
+        porcelain.add(self.repo, paths=[".gitattributes"])
+        porcelain.commit(self.repo, message=b"Add gitattributes")
+
+        # Create text file with CRLF
+        text_file = os.path.join(self.test_dir, "test.txt")
+        with open(text_file, "wb") as f:
+            f.write(b"line1\r\nline2\r\n")
+
+        # Create binary file with CRLF
+        bin_file = os.path.join(self.test_dir, "test.bin")
+        with open(bin_file, "wb") as f:
+            f.write(b"binary\r\ndata\r\n")
+
+        # Add files
+        porcelain.add(self.repo, paths=["test.txt", "test.bin"])
+
+        # Check that text file was normalized
+        index = self.repo.open_index()
+        text_entry = index[b"test.txt"]
+        text_blob = self.repo.object_store[text_entry.sha]
+        self.assertEqual(text_blob.data, b"line1\nline2\n")
+
+        # Check that binary file was not normalized
+        bin_entry = index[b"test.bin"]
+        bin_blob = self.repo.object_store[bin_entry.sha]
+        self.assertEqual(bin_blob.data, b"binary\r\ndata\r\n")
+
+    @unittest.skip("Custom process filters require external commands")
+    def test_gitattributes_custom_filter(self) -> None:
+        """Test custom filter specified in gitattributes."""
+        # Create .gitattributes with custom filter
+        gitattributes_path = os.path.join(self.test_dir, ".gitattributes")
+        with open(gitattributes_path, "wb") as f:
+            f.write(b"*.secret filter=redact\n")
+
+        # Configure custom filter (use tr command for testing)
+        config = self.repo.get_config()
+        # This filter replaces all digits with X
+        config.set((b"filter", b"redact"), b"clean", b"tr '0-9' 'X'")
+        config.write_to_path()
+
+        # Add .gitattributes
+        porcelain.add(self.repo, paths=[".gitattributes"])
+
+        # Create file with sensitive content
+        secret_file = os.path.join(self.test_dir, "password.secret")
+        with open(secret_file, "wb") as f:
+            f.write(b"password123\ntoken456\n")
+
+        # Add file
+        porcelain.add(self.repo, paths=["password.secret"])
+
+        # Check that content was filtered
+        index = self.repo.open_index()
+        entry = index[b"password.secret"]
+        blob = self.repo.object_store[entry.sha]
+        self.assertEqual(blob.data, b"passwordXXX\ntokenXXX\n")
+
+    def test_gitattributes_from_tree(self) -> None:
+        """Test that gitattributes from tree are used when no working tree exists."""
+        # Create .gitattributes with text attribute
+        gitattributes_path = os.path.join(self.test_dir, ".gitattributes")
+        with open(gitattributes_path, "wb") as f:
+            f.write(b"*.txt text\n")
+
+        # Add and commit .gitattributes
+        porcelain.add(self.repo, paths=[".gitattributes"])
+        porcelain.commit(self.repo, message=b"Add gitattributes")
+
+        # Remove .gitattributes from working tree
+        os.remove(gitattributes_path)
+
+        # Get gitattributes - should still work from tree
+        gitattributes = self.repo.get_gitattributes()
+        attrs = gitattributes.match_path(b"test.txt")
+        self.assertEqual(attrs.get(b"text"), True)
+
+    def test_gitattributes_info_attributes(self) -> None:
+        """Test that .git/info/attributes is read."""
+        # Create info/attributes
+        info_dir = os.path.join(self.repo.controldir(), "info")
+        if not os.path.exists(info_dir):
+            os.makedirs(info_dir)
+        info_attrs_path = os.path.join(info_dir, "attributes")
+        with open(info_attrs_path, "wb") as f:
+            f.write(b"*.log text\n")
+
+        # Get gitattributes
+        gitattributes = self.repo.get_gitattributes()
+        attrs = gitattributes.match_path(b"debug.log")
+        self.assertEqual(attrs.get(b"text"), True)
+
+    @unittest.skip("Custom process filters require external commands")
+    def test_filter_precedence(self) -> None:
+        """Test that filter attribute takes precedence over text attribute."""
+        # Create .gitattributes with both text and filter
+        gitattributes_path = os.path.join(self.test_dir, ".gitattributes")
+        with open(gitattributes_path, "wb") as f:
+            f.write(b"*.txt text filter=custom\n")
+
+        # Configure autocrlf and custom filter
+        config = self.repo.get_config()
+        config.set((b"core",), b"autocrlf", b"true")
+        # This filter converts to uppercase
+        config.set((b"filter", b"custom"), b"clean", b"tr '[:lower:]' '[:upper:]'")
+        config.write_to_path()
+
+        # Add .gitattributes
+        porcelain.add(self.repo, paths=[".gitattributes"])
+
+        # Create text file with lowercase and CRLF
+        text_file = os.path.join(self.test_dir, "test.txt")
+        with open(text_file, "wb") as f:
+            f.write(b"hello\r\nworld\r\n")
+
+        # Add file
+        porcelain.add(self.repo, paths=["test.txt"])
+
+        # Check that custom filter was applied (not just line ending conversion)
+        index = self.repo.open_index()
+        entry = index[b"test.txt"]
+        blob = self.repo.object_store[entry.sha]
+        # Should be uppercase with LF endings
+        self.assertEqual(blob.data, b"HELLO\nWORLD\n")
+
+    def test_blob_normalizer_integration(self) -> None:
+        """Test that get_blob_normalizer returns a FilterBlobNormalizer."""
+        normalizer = self.repo.get_blob_normalizer()
+
+        # Check it's the right type
+        from dulwich.filters import FilterBlobNormalizer
+
+        self.assertIsInstance(normalizer, FilterBlobNormalizer)
+
+        # Check it has access to gitattributes
+        self.assertIsNotNone(normalizer.gitattributes)
+        self.assertIsNotNone(normalizer.filter_registry)

+ 2 - 0
tests/test_grafts.py

@@ -168,6 +168,7 @@ class GraftsInRepoTests(GraftsInRepositoryBase, TestCase):
         r._put_named_file(os.path.join("info", "grafts"), b"")
         r._put_named_file(os.path.join("info", "grafts"), b"")
 
 
         r = Repo(self._repo_dir)
         r = Repo(self._repo_dir)
+        self.addCleanup(r.close)
         self.assertEqual({}, r._graftpoints)
         self.assertEqual({}, r._graftpoints)
 
 
     def test_init_with_info_grafts(self) -> None:
     def test_init_with_info_grafts(self) -> None:
@@ -178,6 +179,7 @@ class GraftsInRepoTests(GraftsInRepositoryBase, TestCase):
         )
         )
 
 
         r = Repo(self._repo_dir)
         r = Repo(self._repo_dir)
+        self.addCleanup(r.close)
         self.assertEqual({self._shas[-1]: [self._shas[0]]}, r._graftpoints)
         self.assertEqual({self._shas[-1]: [self._shas[0]]}, r._graftpoints)
 
 
 
 

+ 2 - 1
tests/test_index.py

@@ -718,7 +718,8 @@ class BuildIndexTests(TestCase):
             repo.object_store.add_objects([(blob, None), (tree, None)])
             repo.object_store.add_objects([(blob, None), (tree, None)])
 
 
             # Create blob normalizer
             # Create blob normalizer
-            blob_normalizer = BlobNormalizer(config, {})
+            autocrlf = config.get((b"core",), b"autocrlf")
+            blob_normalizer = BlobNormalizer(config, {}, autocrlf=autocrlf)
 
 
             # Build index with normalization
             # Build index with normalization
             build_index_from_tree(
             build_index_from_tree(

+ 693 - 0
tests/test_lfs.py

@@ -21,6 +21,7 @@
 
 
 """Tests for LFS support."""
 """Tests for LFS support."""
 
 
+import json
 import shutil
 import shutil
 import tempfile
 import tempfile
 
 
@@ -200,6 +201,112 @@ class LFSPointerTests(TestCase):
         self.assertFalse(invalid_pointer.is_valid_oid())
         self.assertFalse(invalid_pointer.is_valid_oid())
 
 
 
 
+class LFSIntegrationTests(TestCase):
+    """Integration tests for LFS with Git operations."""
+
+    def setUp(self) -> None:
+        super().setUp()
+        import os
+
+        from dulwich.repo import Repo
+
+        # Create temporary directory for test repo
+        self.test_dir = tempfile.mkdtemp()
+        self.addCleanup(shutil.rmtree, self.test_dir)
+
+        # Initialize repo
+        self.repo = Repo.init(self.test_dir)
+        self.lfs_dir = os.path.join(self.test_dir, ".git", "lfs")
+        self.lfs_store = LFSStore.create(self.lfs_dir)
+
+    def test_lfs_with_gitattributes(self) -> None:
+        """Test LFS integration with .gitattributes."""
+        import os
+
+        # Create .gitattributes file
+        gitattributes_path = os.path.join(self.test_dir, ".gitattributes")
+        with open(gitattributes_path, "wb") as f:
+            f.write(b"*.bin filter=lfs diff=lfs merge=lfs -text\n")
+
+        # Create a binary file
+        bin_path = os.path.join(self.test_dir, "large.bin")
+        large_content = b"Large binary content" * 1000
+        with open(bin_path, "wb") as f:
+            f.write(large_content)
+
+        # Add files to repo
+        self.repo.stage([".gitattributes", "large.bin"])
+
+        # Get the blob for large.bin from the index
+        index = self.repo.open_index()
+        entry = index[b"large.bin"]
+        blob = self.repo.object_store[entry.sha]
+
+        # With LFS configured, the blob should contain an LFS pointer
+        # (Note: This would require actual LFS filter integration in dulwich)
+        # For now, we just verify the structure
+        self.assertIsNotNone(blob)
+
+    def test_lfs_checkout_missing_object(self) -> None:
+        """Test checkout behavior when LFS object is missing."""
+        from dulwich.objects import Blob, Commit, Tree
+
+        # Create an LFS pointer blob
+        pointer = LFSPointer(
+            "0000000000000000000000000000000000000000000000000000000000000000", 1234
+        )
+        blob = Blob()
+        blob.data = pointer.to_bytes()
+        self.repo.object_store.add_object(blob)
+
+        # Create tree with the blob
+        tree = Tree()
+        tree.add(b"missing.bin", 0o100644, blob.id)
+        self.repo.object_store.add_object(tree)
+
+        # Create commit
+        commit = Commit()
+        commit.tree = tree.id
+        commit.message = b"Add missing LFS file"
+        commit.author = commit.committer = b"Test User <test@example.com>"
+        commit.commit_time = commit.author_time = 1234567890
+        commit.commit_timezone = commit.author_timezone = 0
+        self.repo.object_store.add_object(commit)
+
+        # Update HEAD
+        self.repo.refs[b"HEAD"] = commit.id
+
+        # Checkout should leave pointer file when object is missing
+        # (actual checkout would require more integration)
+
+    def test_lfs_pointer_detection(self) -> None:
+        """Test detection of LFS pointer files."""
+        # Test various file contents
+        test_cases = [
+            # Valid LFS pointer
+            (
+                b"version https://git-lfs.github.com/spec/v1\n"
+                b"oid sha256:e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855\n"
+                b"size 1234\n",
+                True,
+            ),
+            # Regular text file
+            (b"This is a regular text file\n", False),
+            # Binary file
+            (b"\x00\x01\x02\x03\x04", False),
+            # File that starts like pointer but isn't
+            (b"version 1.0\nThis is not an LFS pointer\n", False),
+        ]
+
+        for content, expected_is_pointer in test_cases:
+            pointer = LFSPointer.from_bytes(content)
+            self.assertEqual(
+                pointer is not None,
+                expected_is_pointer,
+                f"Failed for content: {content!r}",
+            )
+
+
 class LFSFilterDriverTests(TestCase):
 class LFSFilterDriverTests(TestCase):
     def setUp(self) -> None:
     def setUp(self) -> None:
         super().setUp()
         super().setUp()
@@ -284,3 +391,589 @@ class LFSFilterDriverTests(TestCase):
 
 
         # Should get back the original content
         # Should get back the original content
         self.assertEqual(restored_content, original_content)
         self.assertEqual(restored_content, original_content)
+
+    def test_clean_empty_file(self) -> None:
+        """Test clean filter on empty file."""
+        content = b""
+        result = self.filter_driver.clean(content)
+
+        # Result should be an LFS pointer
+        pointer = LFSPointer.from_bytes(result)
+        self.assertIsNotNone(pointer)
+        self.assertEqual(pointer.size, 0)
+
+        # Empty content should be stored in LFS
+        with self.lfs_store.open_object(pointer.oid) as f:
+            self.assertEqual(f.read(), content)
+
+    def test_clean_large_file(self) -> None:
+        """Test clean filter on large file."""
+        # Create a large file (1MB)
+        content = b"x" * (1024 * 1024)
+        result = self.filter_driver.clean(content)
+
+        # Result should be an LFS pointer
+        pointer = LFSPointer.from_bytes(result)
+        self.assertIsNotNone(pointer)
+        self.assertEqual(pointer.size, len(content))
+
+        # Content should be stored in LFS
+        with self.lfs_store.open_object(pointer.oid) as f:
+            self.assertEqual(f.read(), content)
+
+    def test_smudge_corrupt_pointer(self) -> None:
+        """Test smudge filter with corrupt pointer data."""
+        # Create corrupt pointer data
+        corrupt_data = (
+            b"version https://git-lfs.github.com/spec/v1\noid sha256:invalid\n"
+        )
+
+        # Smudge should return the data as-is
+        result = self.filter_driver.smudge(corrupt_data)
+        self.assertEqual(result, corrupt_data)
+
+    def test_clean_unicode_content(self) -> None:
+        """Test clean filter with unicode content."""
+        # UTF-8 encoded unicode content
+        content = "Hello 世界 🌍".encode()
+        result = self.filter_driver.clean(content)
+
+        # Result should be an LFS pointer
+        pointer = LFSPointer.from_bytes(result)
+        self.assertIsNotNone(pointer)
+
+        # Content should be preserved exactly
+        with self.lfs_store.open_object(pointer.oid) as f:
+            self.assertEqual(f.read(), content)
+
+
+class LFSStoreEdgeCaseTests(TestCase):
+    """Edge case tests for LFS store."""
+
+    def setUp(self) -> None:
+        super().setUp()
+        self.test_dir = tempfile.mkdtemp()
+        self.addCleanup(shutil.rmtree, self.test_dir)
+        self.lfs = LFSStore.create(self.test_dir)
+
+    def test_concurrent_writes(self) -> None:
+        """Test that concurrent writes to same content work correctly."""
+        content = b"duplicate content"
+
+        # Write the same content multiple times
+        sha1 = self.lfs.write_object([content])
+        sha2 = self.lfs.write_object([content])
+
+        # Should get the same SHA
+        self.assertEqual(sha1, sha2)
+
+        # Content should be stored only once
+        with self.lfs.open_object(sha1) as f:
+            self.assertEqual(f.read(), content)
+
+    def test_write_with_generator(self) -> None:
+        """Test writing object with generator chunks."""
+
+        def chunk_generator():
+            yield b"chunk1"
+            yield b"chunk2"
+            yield b"chunk3"
+
+        sha = self.lfs.write_object(chunk_generator())
+
+        # Verify content
+        with self.lfs.open_object(sha) as f:
+            self.assertEqual(f.read(), b"chunk1chunk2chunk3")
+
+    def test_partial_write_rollback(self) -> None:
+        """Test that partial writes don't leave artifacts."""
+        import os
+
+        # Count initial objects
+        objects_dir = os.path.join(self.test_dir, "objects")
+        initial_count = sum(len(files) for _, _, files in os.walk(objects_dir))
+
+        # Try to write with a failing generator
+        def failing_generator():
+            yield b"chunk1"
+            raise RuntimeError("Simulated error")
+
+        # This should fail
+        with self.assertRaises(RuntimeError):
+            self.lfs.write_object(failing_generator())
+
+        # No new objects should have been created
+        final_count = sum(len(files) for _, _, files in os.walk(objects_dir))
+        self.assertEqual(initial_count, final_count)
+
+
+class LFSPointerEdgeCaseTests(TestCase):
+    """Edge case tests for LFS pointer parsing."""
+
+    def test_pointer_with_windows_line_endings(self) -> None:
+        """Test parsing pointer with Windows line endings."""
+        pointer_data = (
+            b"version https://git-lfs.github.com/spec/v1\r\n"
+            b"oid sha256:e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855\r\n"
+            b"size 1234\r\n"
+        )
+        pointer = LFSPointer.from_bytes(pointer_data)
+        self.assertIsNotNone(pointer)
+        self.assertEqual(pointer.size, 1234)
+
+    def test_pointer_with_extra_whitespace(self) -> None:
+        """Test parsing pointer with extra whitespace."""
+        pointer_data = (
+            b"version https://git-lfs.github.com/spec/v1  \n"
+            b"oid sha256:e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855\n"
+            b"size 1234   \n"
+        )
+        pointer = LFSPointer.from_bytes(pointer_data)
+        self.assertIsNotNone(pointer)
+        self.assertEqual(pointer.size, 1234)
+
+    def test_pointer_case_sensitivity(self) -> None:
+        """Test that pointer parsing is case sensitive."""
+        # Version line must be exact
+        pointer_data = (
+            b"Version https://git-lfs.github.com/spec/v1\n"  # Capital V
+            b"oid sha256:e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855\n"
+            b"size 1234\n"
+        )
+        pointer = LFSPointer.from_bytes(pointer_data)
+        self.assertIsNone(pointer)  # Should fail due to case
+
+    def test_pointer_oid_formats(self) -> None:
+        """Test different OID formats."""
+        # SHA256 is currently the only supported format
+        # Test SHA1 format (should fail)
+        pointer_data = (
+            b"version https://git-lfs.github.com/spec/v1\n"
+            b"oid sha1:356a192b7913b04c54574d18c28d46e6395428ab\n"  # SHA1
+            b"size 1234\n"
+        )
+        pointer = LFSPointer.from_bytes(pointer_data)
+        # This might be accepted but marked as invalid OID
+        if pointer:
+            self.assertFalse(pointer.is_valid_oid())
+
+    def test_pointer_size_limits(self) -> None:
+        """Test size value limits."""
+        # Test with very large size
+        pointer_data = (
+            b"version https://git-lfs.github.com/spec/v1\n"
+            b"oid sha256:e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855\n"
+            b"size 999999999999999999\n"  # Very large number
+        )
+        pointer = LFSPointer.from_bytes(pointer_data)
+        self.assertIsNotNone(pointer)
+        self.assertEqual(pointer.size, 999999999999999999)
+
+        # Test with negative size (should fail)
+        pointer_data = (
+            b"version https://git-lfs.github.com/spec/v1\n"
+            b"oid sha256:e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855\n"
+            b"size -1\n"
+        )
+        pointer = LFSPointer.from_bytes(pointer_data)
+        self.assertIsNone(pointer)  # Should fail with negative size
+
+
+class LFSServerTests(TestCase):
+    """Tests for the LFS server implementation."""
+
+    def setUp(self) -> None:
+        super().setUp()
+        import threading
+
+        from dulwich.lfs_server import run_lfs_server
+
+        # Create temporary directory for LFS storage
+        self.test_dir = tempfile.mkdtemp()
+        self.addCleanup(shutil.rmtree, self.test_dir)
+
+        # Start LFS server
+        self.server, self.server_url = run_lfs_server(port=0, lfs_dir=self.test_dir)
+        self.server_thread = threading.Thread(target=self.server.serve_forever)
+        self.server_thread.daemon = True
+        self.server_thread.start()
+        self.addCleanup(self.server.shutdown)
+
+    def test_server_batch_endpoint(self) -> None:
+        """Test the batch endpoint directly."""
+        from urllib.request import Request, urlopen
+
+        # Create batch request
+        batch_data = {
+            "operation": "download",
+            "transfers": ["basic"],
+            "objects": [{"oid": "abc123", "size": 100}],
+        }
+
+        req = Request(
+            f"{self.server_url}/objects/batch",
+            data=json.dumps(batch_data).encode("utf-8"),
+            headers={
+                "Content-Type": "application/vnd.git-lfs+json",
+                "Accept": "application/vnd.git-lfs+json",
+            },
+            method="POST",
+        )
+
+        with urlopen(req) as response:
+            result = json.loads(response.read())
+
+        self.assertIn("objects", result)
+        self.assertEqual(len(result["objects"]), 1)
+        self.assertEqual(result["objects"][0]["oid"], "abc123")
+        self.assertIn("error", result["objects"][0])  # Object doesn't exist
+
+    def test_server_upload_download(self) -> None:
+        """Test uploading and downloading an object."""
+        import hashlib
+        from urllib.request import Request, urlopen
+
+        test_content = b"test server content"
+        test_oid = hashlib.sha256(test_content).hexdigest()
+
+        # Get upload URL via batch
+        batch_data = {
+            "operation": "upload",
+            "transfers": ["basic"],
+            "objects": [{"oid": test_oid, "size": len(test_content)}],
+        }
+
+        req = Request(
+            f"{self.server_url}/objects/batch",
+            data=json.dumps(batch_data).encode("utf-8"),
+            headers={
+                "Content-Type": "application/vnd.git-lfs+json",
+                "Accept": "application/vnd.git-lfs+json",
+            },
+            method="POST",
+        )
+
+        with urlopen(req) as response:
+            batch_result = json.loads(response.read())
+
+        upload_url = batch_result["objects"][0]["actions"]["upload"]["href"]
+
+        # Upload the object
+        upload_req = Request(
+            upload_url,
+            data=test_content,
+            headers={"Content-Type": "application/octet-stream"},
+            method="PUT",
+        )
+
+        with urlopen(upload_req) as response:
+            self.assertEqual(response.status, 200)
+
+        # Download the object
+        download_batch_data = {
+            "operation": "download",
+            "transfers": ["basic"],
+            "objects": [{"oid": test_oid, "size": len(test_content)}],
+        }
+
+        req = Request(
+            f"{self.server_url}/objects/batch",
+            data=json.dumps(download_batch_data).encode("utf-8"),
+            headers={
+                "Content-Type": "application/vnd.git-lfs+json",
+                "Accept": "application/vnd.git-lfs+json",
+            },
+            method="POST",
+        )
+
+        with urlopen(req) as response:
+            download_batch_result = json.loads(response.read())
+
+        download_url = download_batch_result["objects"][0]["actions"]["download"][
+            "href"
+        ]
+
+        # Download the object
+        download_req = Request(download_url)
+
+        with urlopen(download_req) as response:
+            downloaded_content = response.read()
+
+        self.assertEqual(downloaded_content, test_content)
+
+    def test_server_verify_endpoint(self) -> None:
+        """Test the verify endpoint."""
+        import hashlib
+        from urllib.error import HTTPError
+        from urllib.request import Request, urlopen
+
+        test_content = b"verify test"
+        test_oid = hashlib.sha256(test_content).hexdigest()
+
+        # First upload the object
+        self.server.lfs_store.write_object([test_content])
+
+        # Test verify for existing object
+        verify_req = Request(
+            f"{self.server_url}/objects/{test_oid}/verify",
+            data=json.dumps({"oid": test_oid, "size": len(test_content)}).encode(
+                "utf-8"
+            ),
+            headers={"Content-Type": "application/vnd.git-lfs+json"},
+            method="POST",
+        )
+
+        with urlopen(verify_req) as response:
+            self.assertEqual(response.status, 200)
+
+        # Test verify for non-existent object
+        fake_oid = "0" * 64
+        verify_req = Request(
+            f"{self.server_url}/objects/{fake_oid}/verify",
+            data=json.dumps({"oid": fake_oid, "size": 100}).encode("utf-8"),
+            headers={"Content-Type": "application/vnd.git-lfs+json"},
+            method="POST",
+        )
+
+        with self.assertRaises(HTTPError) as cm:
+            with urlopen(verify_req):
+                pass
+        self.assertEqual(cm.exception.code, 404)
+
+    def test_server_invalid_endpoints(self) -> None:
+        """Test invalid endpoints return 404."""
+        from urllib.error import HTTPError
+        from urllib.request import Request, urlopen
+
+        # Test invalid GET endpoint
+        with self.assertRaises(HTTPError) as cm:
+            with urlopen(f"{self.server_url}/invalid"):
+                pass
+        self.assertEqual(cm.exception.code, 404)
+
+        # Test invalid POST endpoint
+        req = Request(f"{self.server_url}/invalid", data=b"test", method="POST")
+
+        with self.assertRaises(HTTPError) as cm:
+            with urlopen(req):
+                pass
+        self.assertEqual(cm.exception.code, 404)
+
+    def test_server_batch_invalid_operation(self) -> None:
+        """Test batch endpoint with invalid operation."""
+        from urllib.error import HTTPError
+        from urllib.request import Request, urlopen
+
+        batch_data = {"operation": "invalid", "transfers": ["basic"], "objects": []}
+
+        req = Request(
+            f"{self.server_url}/objects/batch",
+            data=json.dumps(batch_data).encode("utf-8"),
+            headers={"Content-Type": "application/vnd.git-lfs+json"},
+            method="POST",
+        )
+
+        with self.assertRaises(HTTPError) as cm:
+            with urlopen(req):
+                pass
+        self.assertEqual(cm.exception.code, 400)
+
+    def test_server_batch_missing_fields(self) -> None:
+        """Test batch endpoint with missing required fields."""
+        from urllib.request import Request, urlopen
+
+        # Missing oid
+        batch_data = {
+            "operation": "download",
+            "transfers": ["basic"],
+            "objects": [{"size": 100}],  # Missing oid
+        }
+
+        req = Request(
+            f"{self.server_url}/objects/batch",
+            data=json.dumps(batch_data).encode("utf-8"),
+            headers={"Content-Type": "application/vnd.git-lfs+json"},
+            method="POST",
+        )
+
+        with urlopen(req) as response:
+            result = json.loads(response.read())
+
+        self.assertIn("error", result["objects"][0])
+        self.assertIn("Missing oid", result["objects"][0]["error"]["message"])
+
+    def test_server_upload_oid_mismatch(self) -> None:
+        """Test upload with OID mismatch."""
+        from urllib.error import HTTPError
+        from urllib.request import Request, urlopen
+
+        # Upload with wrong OID
+        upload_req = Request(
+            f"{self.server_url}/objects/wrongoid123",
+            data=b"test content",
+            headers={"Content-Type": "application/octet-stream"},
+            method="PUT",
+        )
+
+        with self.assertRaises(HTTPError) as cm:
+            with urlopen(upload_req):
+                pass
+        self.assertEqual(cm.exception.code, 400)
+        self.assertIn("OID mismatch", cm.exception.read().decode())
+
+    def test_server_download_non_existent(self) -> None:
+        """Test downloading non-existent object."""
+        from urllib.error import HTTPError
+        from urllib.request import urlopen
+
+        fake_oid = "0" * 64
+
+        with self.assertRaises(HTTPError) as cm:
+            with urlopen(f"{self.server_url}/objects/{fake_oid}"):
+                pass
+        self.assertEqual(cm.exception.code, 404)
+
+    def test_server_invalid_json(self) -> None:
+        """Test batch endpoint with invalid JSON."""
+        from urllib.error import HTTPError
+        from urllib.request import Request, urlopen
+
+        req = Request(
+            f"{self.server_url}/objects/batch",
+            data=b"not json",
+            headers={"Content-Type": "application/vnd.git-lfs+json"},
+            method="POST",
+        )
+
+        with self.assertRaises(HTTPError) as cm:
+            with urlopen(req):
+                pass
+        self.assertEqual(cm.exception.code, 400)
+
+
+class LFSClientTests(TestCase):
+    """Tests for LFS client network operations."""
+
+    def setUp(self) -> None:
+        super().setUp()
+        import threading
+
+        from dulwich.lfs import LFSClient
+        from dulwich.lfs_server import run_lfs_server
+
+        # Create temporary directory for LFS storage
+        self.test_dir = tempfile.mkdtemp()
+        self.addCleanup(shutil.rmtree, self.test_dir)
+
+        # Start LFS server in a thread
+        self.server, self.server_url = run_lfs_server(port=0, lfs_dir=self.test_dir)
+        self.server_thread = threading.Thread(target=self.server.serve_forever)
+        self.server_thread.daemon = True
+        self.server_thread.start()
+        self.addCleanup(self.server.shutdown)
+
+        # Create LFS client pointing to our test server
+        self.client = LFSClient(f"{self.server_url}/objects")
+
+    def test_client_url_normalization(self) -> None:
+        """Test that client URL is normalized correctly."""
+        from dulwich.lfs import LFSClient
+
+        # Test with trailing slash
+        client = LFSClient("https://example.com/repo.git/info/lfs/")
+        self.assertEqual(client.url, "https://example.com/repo.git/info/lfs")
+
+        # Test without trailing slash
+        client = LFSClient("https://example.com/repo.git/info/lfs")
+        self.assertEqual(client.url, "https://example.com/repo.git/info/lfs")
+
+    def test_batch_request_format(self) -> None:
+        """Test batch request formatting."""
+        # Create an object in the store
+        test_content = b"test content for batch"
+        sha = self.server.lfs_store.write_object([test_content])
+
+        # Request download batch
+        result = self.client.batch(
+            "download", [{"oid": sha, "size": len(test_content)}]
+        )
+
+        self.assertIsNotNone(result.objects)
+        self.assertEqual(len(result.objects), 1)
+        self.assertEqual(result.objects[0].oid, sha)
+        self.assertIsNotNone(result.objects[0].actions)
+        self.assertIn("download", result.objects[0].actions)
+
+    def test_download_with_verification(self) -> None:
+        """Test download with size and hash verification."""
+        import hashlib
+
+        from dulwich.lfs import LFSError
+
+        test_content = b"test content for download"
+        test_oid = hashlib.sha256(test_content).hexdigest()
+
+        # Store the object
+        sha = self.server.lfs_store.write_object([test_content])
+        self.assertEqual(sha, test_oid)  # Verify SHA calculation
+
+        # Download the object
+        content = self.client.download(test_oid, len(test_content))
+        self.assertEqual(content, test_content)
+
+        # Test size mismatch
+        with self.assertRaises(LFSError) as cm:
+            self.client.download(test_oid, 999)  # Wrong size
+        self.assertIn("size", str(cm.exception))
+
+    def test_upload_with_verify(self) -> None:
+        """Test upload with verification step."""
+        import hashlib
+
+        test_content = b"upload test content"
+        test_oid = hashlib.sha256(test_content).hexdigest()
+        test_size = len(test_content)
+
+        # Upload the object
+        self.client.upload(test_oid, test_size, test_content)
+
+        # Verify it was stored
+        with self.server.lfs_store.open_object(test_oid) as f:
+            stored_content = f.read()
+        self.assertEqual(stored_content, test_content)
+
+    def test_upload_already_exists(self) -> None:
+        """Test upload when object already exists on server."""
+        import hashlib
+
+        test_content = b"existing content"
+        test_oid = hashlib.sha256(test_content).hexdigest()
+
+        # Pre-store the object
+        self.server.lfs_store.write_object([test_content])
+
+        # Upload again - should not raise an error
+        self.client.upload(test_oid, len(test_content), test_content)
+
+        # Verify it's still there
+        with self.server.lfs_store.open_object(test_oid) as f:
+            self.assertEqual(f.read(), test_content)
+
+    def test_error_handling(self) -> None:
+        """Test error handling for various scenarios."""
+        from urllib.error import HTTPError
+
+        from dulwich.lfs import LFSError
+
+        # Test downloading non-existent object
+        with self.assertRaises(LFSError) as cm:
+            self.client.download(
+                "0000000000000000000000000000000000000000000000000000000000000000", 100
+            )
+        self.assertIn("Object not found", str(cm.exception))
+
+        # Test uploading with wrong OID
+        with self.assertRaises(HTTPError) as cm:
+            self.client.upload("wrong_oid", 5, b"hello")
+        # Server should reject due to OID mismatch
+        self.assertIn("OID mismatch", str(cm.exception))

+ 6 - 3
tests/test_lfs_integration.py

@@ -49,9 +49,12 @@ class LFSFilterIntegrationTests(TestCase):
         self.registry.register_driver("lfs", self.lfs_filter)
         self.registry.register_driver("lfs", self.lfs_filter)
 
 
         # Set up gitattributes to use LFS for .bin files
         # Set up gitattributes to use LFS for .bin files
-        self.gitattributes = {
-            b"*.bin": {b"filter": b"lfs"},
-        }
+        from dulwich.attrs import GitAttributes, Pattern
+
+        patterns = [
+            (Pattern(b"*.bin"), {b"filter": b"lfs"}),
+        ]
+        self.gitattributes = GitAttributes(patterns)
 
 
         self.normalizer = FilterBlobNormalizer(
         self.normalizer = FilterBlobNormalizer(
             self.config, self.gitattributes, self.registry
             self.config, self.gitattributes, self.registry

+ 279 - 20
tests/test_line_ending.py

@@ -22,10 +22,12 @@
 """Tests for the line ending conversion."""
 """Tests for the line ending conversion."""
 
 
 from dulwich.line_ending import (
 from dulwich.line_ending import (
+    BlobNormalizer,
+    LineEndingFilter,
     convert_crlf_to_lf,
     convert_crlf_to_lf,
     convert_lf_to_crlf,
     convert_lf_to_crlf,
-    get_checkin_filter_autocrlf,
-    get_checkout_filter_autocrlf,
+    get_clean_filter_autocrlf,
+    get_smudge_filter_autocrlf,
     normalize_blob,
     normalize_blob,
 )
 )
 from dulwich.objects import Blob
 from dulwich.objects import Blob
@@ -56,35 +58,35 @@ class LineEndingConversion(TestCase):
 
 
 
 
 class GetLineEndingAutocrlfFilters(TestCase):
 class GetLineEndingAutocrlfFilters(TestCase):
-    def test_get_checkin_filter_autocrlf_default(self) -> None:
-        checkin_filter = get_checkin_filter_autocrlf(b"false")
+    def test_get_clean_filter_autocrlf_default(self) -> None:
+        clean_filter = get_clean_filter_autocrlf(b"false")
 
 
-        self.assertEqual(checkin_filter, None)
+        self.assertEqual(clean_filter, None)
 
 
-    def test_get_checkin_filter_autocrlf_true(self) -> None:
-        checkin_filter = get_checkin_filter_autocrlf(b"true")
+    def test_get_clean_filter_autocrlf_true(self) -> None:
+        clean_filter = get_clean_filter_autocrlf(b"true")
 
 
-        self.assertEqual(checkin_filter, convert_crlf_to_lf)
+        self.assertEqual(clean_filter, convert_crlf_to_lf)
 
 
-    def test_get_checkin_filter_autocrlf_input(self) -> None:
-        checkin_filter = get_checkin_filter_autocrlf(b"input")
+    def test_get_clean_filter_autocrlf_input(self) -> None:
+        clean_filter = get_clean_filter_autocrlf(b"input")
 
 
-        self.assertEqual(checkin_filter, convert_crlf_to_lf)
+        self.assertEqual(clean_filter, convert_crlf_to_lf)
 
 
-    def test_get_checkout_filter_autocrlf_default(self) -> None:
-        checkout_filter = get_checkout_filter_autocrlf(b"false")
+    def test_get_smudge_filter_autocrlf_default(self) -> None:
+        smudge_filter = get_smudge_filter_autocrlf(b"false")
 
 
-        self.assertEqual(checkout_filter, None)
+        self.assertEqual(smudge_filter, None)
 
 
-    def test_get_checkout_filter_autocrlf_true(self) -> None:
-        checkout_filter = get_checkout_filter_autocrlf(b"true")
+    def test_get_smudge_filter_autocrlf_true(self) -> None:
+        smudge_filter = get_smudge_filter_autocrlf(b"true")
 
 
-        self.assertEqual(checkout_filter, convert_lf_to_crlf)
+        self.assertEqual(smudge_filter, convert_lf_to_crlf)
 
 
-    def test_get_checkout_filter_autocrlf_input(self) -> None:
-        checkout_filter = get_checkout_filter_autocrlf(b"input")
+    def test_get_smudge_filter_autocrlf_input(self) -> None:
+        smudge_filter = get_smudge_filter_autocrlf(b"input")
 
 
-        self.assertEqual(checkout_filter, None)
+        self.assertEqual(smudge_filter, None)
 
 
 
 
 class NormalizeBlobTestCase(TestCase):
 class NormalizeBlobTestCase(TestCase):
@@ -195,3 +197,260 @@ class NormalizeBlobTestCase(TestCase):
 
 
         self.assertEqual(filtered_blob.as_raw_chunks(), [base_content])
         self.assertEqual(filtered_blob.as_raw_chunks(), [base_content])
         self.assertEqual(filtered_blob.sha().hexdigest(), base_sha)
         self.assertEqual(filtered_blob.sha().hexdigest(), base_sha)
+
+
+class LineEndingFilterTests(TestCase):
+    """Test the LineEndingFilter class."""
+
+    def test_clean_no_conversion(self) -> None:
+        """Test clean with no conversion function."""
+        filter = LineEndingFilter()
+        data = b"test\r\ndata"
+        self.assertEqual(filter.clean(data), data)
+
+    def test_clean_with_conversion(self) -> None:
+        """Test clean with CRLF to LF conversion."""
+        filter = LineEndingFilter(clean_conversion=convert_crlf_to_lf)
+        data = b"test\r\ndata"
+        self.assertEqual(filter.clean(data), b"test\ndata")
+
+    def test_clean_binary_detection(self) -> None:
+        """Test clean skips binary files."""
+        filter = LineEndingFilter(
+            clean_conversion=convert_crlf_to_lf, binary_detection=True
+        )
+        # Binary data with null byte
+        data = b"test\r\n\x00data"
+        self.assertEqual(filter.clean(data), data)  # Should not convert
+
+    def test_smudge_no_conversion(self) -> None:
+        """Test smudge with no conversion function."""
+        filter = LineEndingFilter()
+        data = b"test\ndata"
+        self.assertEqual(filter.smudge(data), data)
+
+    def test_smudge_with_conversion(self) -> None:
+        """Test smudge with LF to CRLF conversion."""
+        filter = LineEndingFilter(smudge_conversion=convert_lf_to_crlf)
+        data = b"test\ndata"
+        self.assertEqual(filter.smudge(data), b"test\r\ndata")
+
+    def test_smudge_binary_detection(self) -> None:
+        """Test smudge skips binary files."""
+        filter = LineEndingFilter(
+            smudge_conversion=convert_lf_to_crlf, binary_detection=True
+        )
+        # Binary data with null byte
+        data = b"test\n\x00data"
+        self.assertEqual(filter.smudge(data), data)  # Should not convert
+
+
+class BlobNormalizerTests(TestCase):
+    """Test the BlobNormalizer class integration with filters."""
+
+    def setUp(self) -> None:
+        super().setUp()
+        from dulwich.config import ConfigDict
+
+        self.config = ConfigDict()
+        self.gitattributes = {}
+
+    def test_autocrlf_true_checkin(self) -> None:
+        """Test checkin with autocrlf=true."""
+        normalizer = BlobNormalizer(self.config, self.gitattributes, autocrlf=b"true")
+
+        # Create blob with CRLF
+        blob = Blob()
+        blob.data = b"line1\r\nline2\r\n"
+
+        # Should convert to LF on checkin
+        result = normalizer.checkin_normalize(blob, b"test.txt")
+        self.assertEqual(result.data, b"line1\nline2\n")
+
+    def test_autocrlf_true_checkout(self) -> None:
+        """Test checkout with autocrlf=true."""
+        normalizer = BlobNormalizer(self.config, self.gitattributes, autocrlf=b"true")
+
+        # Create blob with LF
+        blob = Blob()
+        blob.data = b"line1\nline2\n"
+
+        # Should convert to CRLF on checkout
+        result = normalizer.checkout_normalize(blob, b"test.txt")
+        self.assertEqual(result.data, b"line1\r\nline2\r\n")
+
+    def test_autocrlf_input_checkin(self) -> None:
+        """Test checkin with autocrlf=input."""
+        normalizer = BlobNormalizer(self.config, self.gitattributes, autocrlf=b"input")
+
+        # Create blob with CRLF
+        blob = Blob()
+        blob.data = b"line1\r\nline2\r\n"
+
+        # Should convert to LF on checkin
+        result = normalizer.checkin_normalize(blob, b"test.txt")
+        self.assertEqual(result.data, b"line1\nline2\n")
+
+    def test_autocrlf_input_checkout(self) -> None:
+        """Test checkout with autocrlf=input."""
+        normalizer = BlobNormalizer(self.config, self.gitattributes, autocrlf=b"input")
+
+        # Create blob with LF
+        blob = Blob()
+        blob.data = b"line1\nline2\n"
+
+        # Should NOT convert on checkout with input mode
+        result = normalizer.checkout_normalize(blob, b"test.txt")
+        self.assertIs(result, blob)  # Same object, no conversion
+
+    def test_autocrlf_false(self) -> None:
+        """Test with autocrlf=false (no conversion)."""
+        normalizer = BlobNormalizer(self.config, self.gitattributes, autocrlf=b"false")
+
+        # Create blob with mixed line endings
+        blob = Blob()
+        blob.data = b"line1\r\nline2\nline3"
+
+        # Should not convert on either operation
+        result = normalizer.checkin_normalize(blob, b"test.txt")
+        self.assertIs(result, blob)
+
+        result = normalizer.checkout_normalize(blob, b"test.txt")
+        self.assertIs(result, blob)
+
+    def test_gitattributes_text_attr(self) -> None:
+        """Test gitattributes text attribute overrides autocrlf."""
+        # Set gitattributes to force text conversion
+        self.gitattributes[b"*.txt"] = {b"text": True}
+
+        # Even with autocrlf=false, should convert based on gitattributes
+        normalizer = BlobNormalizer(self.config, self.gitattributes, autocrlf=b"false")
+
+        blob = Blob()
+        blob.data = b"line1\r\nline2\r\n"
+
+        # Should still convert because of gitattributes
+        result = normalizer.checkin_normalize(blob, b"test.txt")
+        # Note: with just text=true and no eol setting, it follows platform defaults
+        # For checkin, it should always normalize to LF
+        self.assertIsNot(result, blob)
+
+    def test_gitattributes_binary_attr(self) -> None:
+        """Test gitattributes -text attribute prevents conversion."""
+        # Set gitattributes to force binary (no conversion)
+        self.gitattributes[b"*.bin"] = {b"text": False}
+
+        normalizer = BlobNormalizer(self.config, self.gitattributes, autocrlf=b"true")
+
+        blob = Blob()
+        blob.data = b"line1\r\nline2\r\n"
+
+        # Should not convert despite autocrlf=true
+        result = normalizer.checkin_normalize(blob, b"test.bin")
+        self.assertIs(result, blob)
+
+    def test_binary_file_detection(self) -> None:
+        """Test that binary files are not converted."""
+        normalizer = BlobNormalizer(self.config, self.gitattributes, autocrlf=b"true")
+
+        # Create blob with binary content
+        blob = Blob()
+        blob.data = b"line1\r\n\x00\xffbinary\r\ndata"
+
+        # Should not convert binary files
+        result = normalizer.checkin_normalize(blob, b"binary.dat")
+        self.assertIs(result, blob)
+
+        result = normalizer.checkout_normalize(blob, b"binary.dat")
+        self.assertIs(result, blob)
+
+
+class LineEndingIntegrationTests(TestCase):
+    """Integration tests for line ending conversion with the filter system."""
+
+    def setUp(self) -> None:
+        super().setUp()
+        from dulwich.config import ConfigDict
+        from dulwich.filters import FilterRegistry
+
+        self.config = ConfigDict()
+        self.registry = FilterRegistry(self.config)
+
+    def test_filter_registry_with_line_endings(self) -> None:
+        """Test that line ending filters work through the registry."""
+        # Register a custom text filter that does line ending conversion
+        filter = LineEndingFilter(
+            clean_conversion=convert_crlf_to_lf,
+            smudge_conversion=convert_lf_to_crlf,
+            binary_detection=True,
+        )
+        self.registry.register_driver("text", filter)
+
+        # Set up gitattributes
+        # Create GitAttributes
+        from dulwich.attrs import GitAttributes, Pattern
+
+        patterns = [(Pattern(b"*.txt"), {b"filter": b"text"})]
+        gitattributes = GitAttributes(patterns)
+
+        # Create normalizer
+        from dulwich.filters import FilterBlobNormalizer
+
+        normalizer = FilterBlobNormalizer(self.config, gitattributes, self.registry)
+
+        # Test round trip
+        blob = Blob()
+        blob.data = b"Hello\r\nWorld\r\n"
+
+        # Checkin should convert CRLF to LF
+        checked_in = normalizer.checkin_normalize(blob, b"test.txt")
+        self.assertEqual(checked_in.data, b"Hello\nWorld\n")
+
+        # Checkout should convert LF to CRLF
+        checked_out = normalizer.checkout_normalize(checked_in, b"test.txt")
+        self.assertEqual(checked_out.data, b"Hello\r\nWorld\r\n")
+
+    def test_mixed_filters(self) -> None:
+        """Test multiple filters can coexist (line endings and LFS)."""
+        # This would be a more complex test requiring LFS setup
+        # For now, just verify the structure works
+        text_filter = LineEndingFilter(
+            clean_conversion=convert_crlf_to_lf,
+            smudge_conversion=convert_lf_to_crlf,
+        )
+        self.registry.register_driver("text", text_filter)
+
+        # Mock LFS filter
+        class MockLFSFilter:
+            def clean(self, data):
+                return b"LFS pointer"
+
+            def smudge(self, data):
+                return b"LFS content"
+
+        self.registry.register_driver("lfs", MockLFSFilter())
+
+        # Different files use different filters
+        from dulwich.attrs import GitAttributes, Pattern
+
+        patterns = [
+            (Pattern(b"*.txt"), {b"filter": b"text"}),
+            (Pattern(b"*.bin"), {b"filter": b"lfs"}),
+        ]
+        gitattributes = GitAttributes(patterns)
+
+        from dulwich.filters import FilterBlobNormalizer
+
+        normalizer = FilterBlobNormalizer(self.config, gitattributes, self.registry)
+
+        # Text file gets line ending conversion
+        text_blob = Blob()
+        text_blob.data = b"text\r\nfile"
+        result = normalizer.checkin_normalize(text_blob, b"test.txt")
+        self.assertEqual(result.data, b"text\nfile")
+
+        # Binary file gets LFS conversion
+        bin_blob = Blob()
+        bin_blob.data = b"binary content"
+        result = normalizer.checkin_normalize(bin_blob, b"test.bin")
+        self.assertEqual(result.data, b"LFS pointer")

+ 4 - 1
tests/test_porcelain.py

@@ -918,6 +918,7 @@ class CloneTests(PorcelainTestCase):
         self.addCleanup(r.close)
         self.addCleanup(r.close)
         self.assertEqual(r.path, target_path)
         self.assertEqual(r.path, target_path)
         target_repo = Repo(target_path)
         target_repo = Repo(target_path)
+        self.addCleanup(target_repo.close)
         self.assertEqual(0, len(target_repo.open_index()))
         self.assertEqual(0, len(target_repo.open_index()))
         self.assertEqual(c3.id, target_repo.refs[b"refs/tags/foo"])
         self.assertEqual(c3.id, target_repo.refs[b"refs/tags/foo"])
         self.assertNotIn(b"f1", os.listdir(target_path))
         self.assertNotIn(b"f1", os.listdir(target_path))
@@ -1055,6 +1056,7 @@ class CloneTests(PorcelainTestCase):
         self.addCleanup(r.close)
         self.addCleanup(r.close)
         self.assertEqual(r.path, target_path)
         self.assertEqual(r.path, target_path)
         target_repo = Repo(target_path)
         target_repo = Repo(target_path)
+        self.addCleanup(target_repo.close)
         self.assertEqual(0, len(target_repo.open_index()))
         self.assertEqual(0, len(target_repo.open_index()))
         self.assertEqual(c1.id, target_repo.refs[b"refs/heads/else"])
         self.assertEqual(c1.id, target_repo.refs[b"refs/heads/else"])
         self.assertEqual(c1.id, target_repo.refs[b"HEAD"])
         self.assertEqual(c1.id, target_repo.refs[b"HEAD"])
@@ -4187,6 +4189,7 @@ class CheckoutTests(PorcelainTestCase):
         target_path = tempfile.mkdtemp()
         target_path = tempfile.mkdtemp()
         self.addCleanup(shutil.rmtree, target_path)
         self.addCleanup(shutil.rmtree, target_path)
         target_repo = porcelain.clone(remote_path, target_path)
         target_repo = porcelain.clone(remote_path, target_path)
+        self.addCleanup(target_repo.close)
 
 
         # Create a remote tracking branch reference
         # Create a remote tracking branch reference
         remote_branch_ref = b"refs/remotes/origin/feature"
         remote_branch_ref = b"refs/remotes/origin/feature"
@@ -5385,7 +5388,7 @@ class StatusTests(PorcelainTestCase):
         self.assertDictEqual(
         self.assertDictEqual(
             {"add": [b"crlf-new"], "delete": [], "modify": []}, results.staged
             {"add": [b"crlf-new"], "delete": [], "modify": []}, results.staged
         )
         )
-        self.assertListEqual(results.unstaged, [])
+        self.assertListEqual(results.unstaged, [b"crlf-exists"])
         self.assertListEqual(results.untracked, [])
         self.assertListEqual(results.untracked, [])
 
 
     def test_get_tree_changes_add(self) -> None:
     def test_get_tree_changes_add(self) -> None:

+ 450 - 0
tests/test_porcelain_filters.py

@@ -0,0 +1,450 @@
+# test_porcelain_filters.py -- Tests for porcelain filter integration
+# Copyright (C) 2024 Jelmer Vernooij <jelmer@jelmer.uk>
+#
+# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
+# General Public License as public by the Free Software Foundation; version 2.0
+# or (at your option) any later version. You can redistribute it and/or
+# modify it under the terms of either of these two licenses.
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# You should have received a copy of the licenses; if not, see
+# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
+# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
+# License, Version 2.0.
+#
+
+"""Tests for porcelain filter integration."""
+
+import os
+import tempfile
+from io import BytesIO
+
+from dulwich import porcelain
+from dulwich.repo import Repo
+
+from . import TestCase
+from .compat.utils import rmtree_ro
+
+
+class PorcelainFilterTests(TestCase):
+    """Test filter integration in porcelain commands."""
+
+    def setUp(self) -> None:
+        super().setUp()
+        self.test_dir = tempfile.mkdtemp()
+        self.addCleanup(rmtree_ro, self.test_dir)
+        self.repo = Repo.init(self.test_dir)
+        self.addCleanup(self.repo.close)
+
+    def test_add_with_autocrlf(self) -> None:
+        """Test adding files with autocrlf enabled."""
+        # Configure autocrlf
+        config = self.repo.get_config()
+        config.set((b"core",), b"autocrlf", b"true")
+        config.write_to_path()
+
+        # Create a file with CRLF line endings
+        test_file = os.path.join(self.test_dir, "test.txt")
+        with open(test_file, "wb") as f:
+            f.write(b"line1\r\nline2\r\nline3\r\n")
+
+        # Add the file
+        porcelain.add(self.repo, paths=["test.txt"])
+
+        # Check that the blob in the index has LF line endings
+        index = self.repo.open_index()
+        entry = index[b"test.txt"]
+        blob = self.repo.object_store[entry.sha]
+        self.assertEqual(blob.data, b"line1\nline2\nline3\n")
+
+    def test_checkout_with_autocrlf(self) -> None:
+        """Test checkout with autocrlf enabled."""
+        # First, add a file with LF line endings to the repo
+        test_file = os.path.join(self.test_dir, "test.txt")
+        with open(test_file, "wb") as f:
+            f.write(b"line1\nline2\nline3\n")
+
+        porcelain.add(self.repo, paths=["test.txt"])
+        porcelain.commit(self.repo, message=b"Add test file")
+
+        # Remove the file
+        os.remove(test_file)
+
+        # Configure autocrlf
+        config = self.repo.get_config()
+        config.set((b"core",), b"autocrlf", b"true")
+        config.write_to_path()
+
+        # Checkout the file
+        porcelain.checkout(self.repo, paths=["test.txt"])
+
+        # On Windows or with autocrlf=true, file should have CRLF line endings
+        with open(test_file, "rb") as f:
+            content = f.read()
+            # The checkout should apply the smudge filter
+            self.assertEqual(content, b"line1\r\nline2\r\nline3\r\n")
+
+    def test_status_with_filters(self) -> None:
+        """Test status command with filters applied."""
+        # Configure autocrlf
+        config = self.repo.get_config()
+        config.set((b"core",), b"autocrlf", b"input")
+        config.write_to_path()
+
+        # Create a file with CRLF line endings
+        test_file = os.path.join(self.test_dir, "test.txt")
+        with open(test_file, "wb") as f:
+            f.write(b"line1\r\nline2\r\n")
+
+        # Add and commit with normalized line endings
+        porcelain.add(self.repo, paths=["test.txt"])
+        porcelain.commit(self.repo, message=b"Initial commit")
+
+        # Modify the file with CRLF line endings
+        with open(test_file, "wb") as f:
+            f.write(b"line1\r\nline2\r\nline3\r\n")
+
+        # Status should detect the change after normalizing
+        results = porcelain.status(self.repo)
+        self.assertIn(b"test.txt", results.unstaged)
+
+    def test_diff_with_filters(self) -> None:
+        """Test diff command with filters applied."""
+        # Configure autocrlf
+        config = self.repo.get_config()
+        config.set((b"core",), b"autocrlf", b"true")
+        config.write_to_path()
+
+        # Create and commit a file
+        test_file = os.path.join(self.test_dir, "test.txt")
+        with open(test_file, "wb") as f:
+            f.write(b"line1\r\nline2\r\n")
+
+        porcelain.add(self.repo, paths=["test.txt"])
+        porcelain.commit(self.repo, message=b"Initial commit")
+
+        # Modify the file
+        with open(test_file, "wb") as f:
+            f.write(b"line1\r\nmodified\r\nline3\r\n")
+
+        # Get diff - should normalize line endings for comparison
+        outstream = BytesIO()
+        porcelain.diff(self.repo, outstream=outstream)
+        diff_output = outstream.getvalue()
+        self.assertIn(b"-line2", diff_output)
+        self.assertIn(b"+modified", diff_output)
+        self.assertIn(b"+line3", diff_output)
+
+    def test_add_with_gitattributes(self) -> None:
+        """Test adding files with gitattributes filters."""
+        # Create .gitattributes with text attribute
+        gitattributes_path = os.path.join(self.test_dir, ".gitattributes")
+        with open(gitattributes_path, "wb") as f:
+            f.write(b"*.txt text\n")
+            f.write(b"*.bin -text\n")
+
+        # Add .gitattributes
+        porcelain.add(self.repo, paths=[".gitattributes"])
+
+        # Create text file with CRLF
+        text_file = os.path.join(self.test_dir, "test.txt")
+        with open(text_file, "wb") as f:
+            f.write(b"text\r\nfile\r\n")
+
+        # Create binary file with CRLF (should not be converted)
+        bin_file = os.path.join(self.test_dir, "test.bin")
+        with open(bin_file, "wb") as f:
+            f.write(b"binary\r\nfile\r\n")
+
+        # Add both files
+        porcelain.add(self.repo, paths=["test.txt", "test.bin"])
+
+        # Check text file was normalized
+        index = self.repo.open_index()
+        text_entry = index[b"test.txt"]
+        text_blob = self.repo.object_store[text_entry.sha]
+        self.assertEqual(text_blob.data, b"text\nfile\n")
+
+        # Check binary file was not normalized
+        bin_entry = index[b"test.bin"]
+        bin_blob = self.repo.object_store[bin_entry.sha]
+        self.assertEqual(bin_blob.data, b"binary\r\nfile\r\n")
+
+    def test_clone_with_filters(self) -> None:
+        """Test cloning a repository with filters."""
+        # Create a source repository
+        source_dir = tempfile.mkdtemp()
+        self.addCleanup(rmtree_ro, source_dir)
+        source_repo = Repo.init(source_dir)
+        self.addCleanup(source_repo.close)
+
+        # Add a file with LF endings
+        test_file = os.path.join(source_dir, "test.txt")
+        with open(test_file, "wb") as f:
+            f.write(b"line1\nline2\n")
+
+        porcelain.add(source_repo, paths=["test.txt"])
+        porcelain.commit(source_repo, message=b"Initial commit")
+
+        # Clone the repository without checkout
+        target_dir = tempfile.mkdtemp()
+        self.addCleanup(rmtree_ro, target_dir)
+
+        # Clone without checkout first
+        target_repo = porcelain.clone(source_dir, target_dir, checkout=False)
+        self.addCleanup(target_repo.close)
+
+        # Configure autocrlf in target repo
+        target_config = target_repo.get_config()
+        target_config.set((b"core",), b"autocrlf", b"true")
+        target_config.write_to_path()
+
+        # Now checkout the files with autocrlf enabled
+        target_repo.reset_index()
+
+        # Check that the working tree file has CRLF endings
+        target_file = os.path.join(target_dir, "test.txt")
+        with open(target_file, "rb") as f:
+            content = f.read()
+            # The checkout should apply the smudge filter
+            self.assertIn(b"\r\n", content)
+
+    def test_commit_with_clean_filter(self) -> None:
+        """Test committing with a clean filter."""
+        # Set up a custom filter in git config
+        config = self.repo.get_config()
+        config.set((b"filter", b"testfilter"), b"clean", b"sed 's/SECRET/REDACTED/g'")
+        config.write_to_path()
+
+        # Create .gitattributes to use the filter
+        gitattributes_path = os.path.join(self.test_dir, ".gitattributes")
+        with open(gitattributes_path, "wb") as f:
+            f.write(b"*.secret filter=testfilter\n")
+
+        porcelain.add(self.repo, paths=[".gitattributes"])
+        porcelain.commit(self.repo, message=b"Add gitattributes")
+
+        # Create a file with sensitive content
+        secret_file = os.path.join(self.test_dir, "config.secret")
+        with open(secret_file, "wb") as f:
+            f.write(b"password=SECRET123\n")
+
+        # Add the file
+        porcelain.add(self.repo, paths=["config.secret"])
+
+        # The committed blob should have filtered content
+        # (Note: actual filter execution requires process filter support)
+
+    def test_ls_files_with_filters(self) -> None:
+        """Test ls-files respects filter settings."""
+        # Configure autocrlf
+        config = self.repo.get_config()
+        config.set((b"core",), b"autocrlf", b"true")
+        config.write_to_path()
+
+        # Create files with different line endings
+        file1 = os.path.join(self.test_dir, "unix.txt")
+        with open(file1, "wb") as f:
+            f.write(b"unix\nfile\n")
+
+        file2 = os.path.join(self.test_dir, "windows.txt")
+        with open(file2, "wb") as f:
+            f.write(b"windows\r\nfile\r\n")
+
+        # Add files
+        porcelain.add(self.repo, paths=["unix.txt", "windows.txt"])
+
+        # List files
+        files = list(porcelain.ls_files(self.repo))
+        self.assertIn(b"unix.txt", files)
+        self.assertIn(b"windows.txt", files)
+
+        # Both files should be normalized in the index
+        index = self.repo.open_index()
+        for filename in [b"unix.txt", b"windows.txt"]:
+            entry = index[filename]
+            blob = self.repo.object_store[entry.sha]
+            # Both should have LF line endings in the repository
+            self.assertNotIn(b"\r\n", blob.data)
+
+
+class PorcelainLFSIntegrationTests(TestCase):
+    """Test LFS integration in porcelain commands."""
+
+    def setUp(self) -> None:
+        super().setUp()
+        self.test_dir = tempfile.mkdtemp()
+        self.addCleanup(rmtree_ro, self.test_dir)
+        self.repo = Repo.init(self.test_dir)
+        self.addCleanup(self.repo.close)
+
+        # Set up LFS
+        lfs_dir = os.path.join(self.test_dir, ".git", "lfs")
+        os.makedirs(lfs_dir, exist_ok=True)
+
+    def test_add_large_file_with_lfs(self) -> None:
+        """Test adding large files with LFS filter."""
+        # Configure LFS filter
+        config = self.repo.get_config()
+        config.set((b"filter", b"lfs"), b"clean", b"git-lfs clean -- %f")
+        config.set((b"filter", b"lfs"), b"smudge", b"git-lfs smudge -- %f")
+        config.set((b"filter", b"lfs"), b"process", b"git-lfs filter-process")
+        config.set((b"filter", b"lfs"), b"required", b"true")
+        config.write_to_path()
+
+        # Create .gitattributes for LFS
+        gitattributes_path = os.path.join(self.test_dir, ".gitattributes")
+        with open(gitattributes_path, "wb") as f:
+            f.write(b"*.bin filter=lfs diff=lfs merge=lfs -text\n")
+
+        porcelain.add(self.repo, paths=[".gitattributes"])
+        porcelain.commit(self.repo, message=b"Add LFS attributes")
+
+        # Create a large binary file
+        large_file = os.path.join(self.test_dir, "large.bin")
+        content = b"X" * (1024 * 1024)  # 1MB file
+        with open(large_file, "wb") as f:
+            f.write(content)
+
+        # Add the large file
+        # Note: actual LFS handling requires git-lfs to be installed
+        # This test verifies the filter infrastructure is in place
+        porcelain.add(self.repo, paths=["large.bin"])
+
+        # Check that something was added to the index
+        index = self.repo.open_index()
+        self.assertIn(b"large.bin", index)
+
+    def test_status_with_lfs_files(self) -> None:
+        """Test status command with LFS files."""
+        # Set up LFS attributes
+        gitattributes_path = os.path.join(self.test_dir, ".gitattributes")
+        with open(gitattributes_path, "wb") as f:
+            f.write(b"*.bin filter=lfs diff=lfs merge=lfs -text\n")
+
+        porcelain.add(self.repo, paths=[".gitattributes"])
+        porcelain.commit(self.repo, message=b"Add LFS attributes")
+
+        # Create an LFS pointer file manually
+        from dulwich.lfs import LFSPointer
+
+        pointer = LFSPointer(
+            "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855", 1024
+        )
+        lfs_file = os.path.join(self.test_dir, "data.bin")
+        with open(lfs_file, "wb") as f:
+            f.write(pointer.to_bytes())
+
+        # Add and commit the pointer
+        porcelain.add(self.repo, paths=["data.bin"])
+        porcelain.commit(self.repo, message=b"Add LFS file")
+
+        # Modify the pointer file
+        with open(lfs_file, "ab") as f:
+            f.write(b"modified\n")
+
+        # Status should detect the change
+        results = porcelain.status(self.repo)
+        self.assertIn(b"data.bin", results.unstaged)
+
+
+class FilterEdgeCaseTests(TestCase):
+    """Test edge cases in filter handling."""
+
+    def setUp(self) -> None:
+        super().setUp()
+        self.test_dir = tempfile.mkdtemp()
+        self.addCleanup(rmtree_ro, self.test_dir)
+        self.repo = Repo.init(self.test_dir)
+        self.addCleanup(self.repo.close)
+
+    def test_mixed_line_endings(self) -> None:
+        """Test handling files with mixed line endings."""
+        config = self.repo.get_config()
+        config.set((b"core",), b"autocrlf", b"true")
+        config.write_to_path()
+
+        # Create file with mixed line endings
+        mixed_file = os.path.join(self.test_dir, "mixed.txt")
+        with open(mixed_file, "wb") as f:
+            f.write(b"line1\r\nline2\nline3\r\nline4")
+
+        porcelain.add(self.repo, paths=["mixed.txt"])
+
+        # Check normalization
+        index = self.repo.open_index()
+        entry = index[b"mixed.txt"]
+        blob = self.repo.object_store[entry.sha]
+        # Should normalize all to LF
+        self.assertEqual(blob.data, b"line1\nline2\nline3\nline4")
+
+    def test_binary_detection(self) -> None:
+        """Test binary file detection in filters."""
+        config = self.repo.get_config()
+        config.set((b"core",), b"autocrlf", b"true")
+        config.write_to_path()
+
+        # Create a file with binary content
+        binary_file = os.path.join(self.test_dir, "binary.dat")
+        with open(binary_file, "wb") as f:
+            f.write(b"\x00\x01\x02\r\n\x03\x04\r\n")
+
+        porcelain.add(self.repo, paths=["binary.dat"])
+
+        # Binary files should not be converted
+        index = self.repo.open_index()
+        entry = index[b"binary.dat"]
+        blob = self.repo.object_store[entry.sha]
+        self.assertEqual(blob.data, b"\x00\x01\x02\r\n\x03\x04\r\n")
+
+    def test_empty_file_handling(self) -> None:
+        """Test filter handling of empty files."""
+        config = self.repo.get_config()
+        config.set((b"core",), b"autocrlf", b"true")
+        config.write_to_path()
+
+        # Create empty file
+        empty_file = os.path.join(self.test_dir, "empty.txt")
+        with open(empty_file, "wb") as f:
+            f.write(b"")
+
+        porcelain.add(self.repo, paths=["empty.txt"])
+
+        # Empty files should pass through unchanged
+        index = self.repo.open_index()
+        entry = index[b"empty.txt"]
+        blob = self.repo.object_store[entry.sha]
+        self.assertEqual(blob.data, b"")
+
+    def test_gitattributes_precedence(self) -> None:
+        """Test that gitattributes takes precedence over config."""
+        # Set autocrlf=false in config
+        config = self.repo.get_config()
+        config.set((b"core",), b"autocrlf", b"false")
+        config.write_to_path()
+
+        # But force text conversion via gitattributes
+        gitattributes_path = os.path.join(self.test_dir, ".gitattributes")
+        with open(gitattributes_path, "wb") as f:
+            f.write(b"*.txt text\n")
+
+        porcelain.add(self.repo, paths=[".gitattributes"])
+
+        # Create file with CRLF
+        text_file = os.path.join(self.test_dir, "test.txt")
+        with open(text_file, "wb") as f:
+            f.write(b"line1\r\nline2\r\n")
+
+        porcelain.add(self.repo, paths=["test.txt"])
+
+        # Should be normalized despite autocrlf=false
+        index = self.repo.open_index()
+        entry = index[b"test.txt"]
+        blob = self.repo.object_store[entry.sha]
+        self.assertEqual(blob.data, b"line1\nline2\n")

+ 244 - 0
tests/test_porcelain_lfs.py

@@ -0,0 +1,244 @@
+# test_porcelain_lfs.py -- Tests for LFS porcelain functions
+# Copyright (C) 2024 Jelmer Vernooij
+#
+# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
+# General Public License as public by the Free Software Foundation; version 2.0
+# or (at your option) any later version. You can redistribute it and/or
+# modify it under the terms of either of these two licenses.
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# You should have received a copy of the licenses; if not, see
+# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
+# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
+# License, Version 2.0.
+#
+
+"""Tests for LFS porcelain functions."""
+
+import os
+import tempfile
+import unittest
+
+from dulwich import porcelain
+from dulwich.lfs import LFSPointer, LFSStore
+from dulwich.repo import Repo
+from tests import TestCase
+
+
+class LFSPorcelainTestCase(TestCase):
+    """Test case for LFS porcelain functions."""
+
+    def setUp(self):
+        super().setUp()
+        self.test_dir = tempfile.mkdtemp()
+        self.addCleanup(self._cleanup_test_dir)
+        self.repo = Repo.init(self.test_dir)
+        self.addCleanup(self.repo.close)
+
+    def _cleanup_test_dir(self):
+        """Clean up test directory recursively."""
+        import shutil
+
+        shutil.rmtree(self.test_dir, ignore_errors=True)
+
+    def test_lfs_init(self):
+        """Test LFS initialization."""
+        porcelain.lfs_init(self.repo)
+
+        # Check that LFS store was created
+        lfs_dir = os.path.join(self.repo.controldir(), "lfs")
+        self.assertTrue(os.path.exists(lfs_dir))
+        self.assertTrue(os.path.exists(os.path.join(lfs_dir, "objects")))
+        self.assertTrue(os.path.exists(os.path.join(lfs_dir, "tmp")))
+
+        # Check that config was set
+        config = self.repo.get_config()
+        self.assertEqual(
+            config.get((b"filter", b"lfs"), b"process"), b"git-lfs filter-process"
+        )
+        self.assertEqual(config.get((b"filter", b"lfs"), b"required"), b"true")
+
+    def test_lfs_track(self):
+        """Test tracking patterns with LFS."""
+        # Track some patterns
+        patterns = ["*.bin", "*.pdf"]
+        tracked = porcelain.lfs_track(self.repo, patterns)
+
+        self.assertEqual(set(tracked), set(patterns))
+
+        # Check .gitattributes was created
+        gitattributes_path = os.path.join(self.repo.path, ".gitattributes")
+        self.assertTrue(os.path.exists(gitattributes_path))
+
+        # Read and verify content
+        with open(gitattributes_path, "rb") as f:
+            content = f.read()
+
+        self.assertIn(b"*.bin diff=lfs filter=lfs merge=lfs -text", content)
+        self.assertIn(b"*.pdf diff=lfs filter=lfs merge=lfs -text", content)
+
+        # Test listing tracked patterns
+        tracked = porcelain.lfs_track(self.repo)
+        self.assertEqual(set(tracked), set(patterns))
+
+    def test_lfs_untrack(self):
+        """Test untracking patterns from LFS."""
+        # First track some patterns
+        patterns = ["*.bin", "*.pdf", "*.zip"]
+        porcelain.lfs_track(self.repo, patterns)
+
+        # Untrack one pattern
+        remaining = porcelain.lfs_untrack(self.repo, ["*.pdf"])
+        self.assertEqual(set(remaining), {"*.bin", "*.zip"})
+
+        # Verify .gitattributes
+        with open(os.path.join(self.repo.path, ".gitattributes"), "rb") as f:
+            content = f.read()
+
+        self.assertIn(b"*.bin diff=lfs filter=lfs merge=lfs -text", content)
+        self.assertNotIn(b"*.pdf diff=lfs filter=lfs merge=lfs -text", content)
+        self.assertIn(b"*.zip diff=lfs filter=lfs merge=lfs -text", content)
+
+    def test_lfs_clean(self):
+        """Test cleaning a file to LFS pointer."""
+        # Initialize LFS
+        porcelain.lfs_init(self.repo)
+
+        # Create a test file
+        test_content = b"This is test content for LFS"
+        test_file = os.path.join(self.repo.path, "test.bin")
+        with open(test_file, "wb") as f:
+            f.write(test_content)
+
+        # Clean the file
+        pointer_content = porcelain.lfs_clean(self.repo, "test.bin")
+
+        # Verify it's a valid LFS pointer
+        pointer = LFSPointer.from_bytes(pointer_content)
+        self.assertIsNotNone(pointer)
+        self.assertEqual(pointer.size, len(test_content))
+
+        # Verify the content was stored in LFS
+        lfs_store = LFSStore.from_repo(self.repo)
+        with lfs_store.open_object(pointer.oid) as f:
+            stored_content = f.read()
+        self.assertEqual(stored_content, test_content)
+
+    def test_lfs_smudge(self):
+        """Test smudging an LFS pointer to content."""
+        # Initialize LFS
+        porcelain.lfs_init(self.repo)
+
+        # Create test content and store it
+        test_content = b"This is test content for smudging"
+        lfs_store = LFSStore.from_repo(self.repo)
+        oid = lfs_store.write_object([test_content])
+
+        # Create LFS pointer
+        pointer = LFSPointer(oid, len(test_content))
+        pointer_content = pointer.to_bytes()
+
+        # Smudge the pointer
+        smudged_content = porcelain.lfs_smudge(self.repo, pointer_content)
+
+        self.assertEqual(smudged_content, test_content)
+
+    def test_lfs_ls_files(self):
+        """Test listing LFS files."""
+        # Initialize repo with some LFS files
+        porcelain.lfs_init(self.repo)
+
+        # Create a test file and convert to LFS
+        test_content = b"Large file content"
+        test_file = os.path.join(self.repo.path, "large.bin")
+        with open(test_file, "wb") as f:
+            f.write(test_content)
+
+        # Clean to LFS pointer
+        pointer_content = porcelain.lfs_clean(self.repo, "large.bin")
+        with open(test_file, "wb") as f:
+            f.write(pointer_content)
+
+        # Add and commit
+        porcelain.add(self.repo, paths=["large.bin"])
+        porcelain.commit(self.repo, message=b"Add LFS file")
+
+        # List LFS files
+        lfs_files = porcelain.lfs_ls_files(self.repo)
+
+        self.assertEqual(len(lfs_files), 1)
+        path, oid, size = lfs_files[0]
+        self.assertEqual(path, "large.bin")
+        self.assertEqual(size, len(test_content))
+
+    def test_lfs_migrate(self):
+        """Test migrating files to LFS."""
+        # Create some files
+        files = {
+            "small.txt": b"Small file",
+            "large1.bin": b"X" * 1000,
+            "large2.dat": b"Y" * 2000,
+            "exclude.bin": b"Z" * 1500,
+        }
+
+        for filename, content in files.items():
+            path = os.path.join(self.repo.path, filename)
+            with open(path, "wb") as f:
+                f.write(content)
+
+        # Add files to index
+        porcelain.add(self.repo, paths=list(files.keys()))
+
+        # Migrate with patterns
+        count = porcelain.lfs_migrate(
+            self.repo, include=["*.bin", "*.dat"], exclude=["exclude.*"]
+        )
+
+        self.assertEqual(count, 2)  # large1.bin and large2.dat
+
+        # Verify files were converted to LFS pointers
+        for filename in ["large1.bin", "large2.dat"]:
+            path = os.path.join(self.repo.path, filename)
+            with open(path, "rb") as f:
+                content = f.read()
+            pointer = LFSPointer.from_bytes(content)
+            self.assertIsNotNone(pointer)
+
+    def test_lfs_pointer_check(self):
+        """Test checking if files are LFS pointers."""
+        # Initialize LFS
+        porcelain.lfs_init(self.repo)
+
+        # Create an LFS pointer file
+        test_content = b"LFS content"
+        lfs_file = os.path.join(self.repo.path, "lfs.bin")
+        # First create the file
+        with open(lfs_file, "wb") as f:
+            f.write(test_content)
+        pointer_content = porcelain.lfs_clean(self.repo, "lfs.bin")
+        with open(lfs_file, "wb") as f:
+            f.write(pointer_content)
+
+        # Create a regular file
+        regular_file = os.path.join(self.repo.path, "regular.txt")
+        with open(regular_file, "wb") as f:
+            f.write(b"Regular content")
+
+        # Check both files
+        results = porcelain.lfs_pointer_check(
+            self.repo, paths=["lfs.bin", "regular.txt", "nonexistent.txt"]
+        )
+
+        self.assertIsNotNone(results["lfs.bin"])
+        self.assertIsNone(results["regular.txt"])
+        self.assertIsNone(results["nonexistent.txt"])
+
+
+if __name__ == "__main__":
+    unittest.main()

+ 4 - 0
tests/test_porcelain_merge.py

@@ -283,6 +283,7 @@ class PorcelainMergeTreeTests(TestCase):
             # Initialize repo
             # Initialize repo
             porcelain.init(tmpdir)
             porcelain.init(tmpdir)
             repo = Repo(tmpdir)
             repo = Repo(tmpdir)
+            self.addCleanup(repo.close)
 
 
             # Create base tree
             # Create base tree
             with open(os.path.join(tmpdir, "file1.txt"), "w") as f:
             with open(os.path.join(tmpdir, "file1.txt"), "w") as f:
@@ -329,6 +330,7 @@ class PorcelainMergeTreeTests(TestCase):
             # Initialize repo
             # Initialize repo
             porcelain.init(tmpdir)
             porcelain.init(tmpdir)
             repo = Repo(tmpdir)
             repo = Repo(tmpdir)
+            self.addCleanup(repo.close)
 
 
             # Create base tree
             # Create base tree
             with open(os.path.join(tmpdir, "file1.txt"), "w") as f:
             with open(os.path.join(tmpdir, "file1.txt"), "w") as f:
@@ -381,6 +383,7 @@ class PorcelainMergeTreeTests(TestCase):
             # Initialize repo
             # Initialize repo
             porcelain.init(tmpdir)
             porcelain.init(tmpdir)
             repo = Repo(tmpdir)
             repo = Repo(tmpdir)
+            self.addCleanup(repo.close)
 
 
             # Create our tree
             # Create our tree
             with open(os.path.join(tmpdir, "file1.txt"), "w") as f:
             with open(os.path.join(tmpdir, "file1.txt"), "w") as f:
@@ -414,6 +417,7 @@ class PorcelainMergeTreeTests(TestCase):
             # Initialize repo
             # Initialize repo
             porcelain.init(tmpdir)
             porcelain.init(tmpdir)
             repo = Repo(tmpdir)
             repo = Repo(tmpdir)
+            self.addCleanup(repo.close)
 
 
             # Create base tree
             # Create base tree
             with open(os.path.join(tmpdir, "file1.txt"), "w") as f:
             with open(os.path.join(tmpdir, "file1.txt"), "w") as f:

+ 1 - 0
tests/test_repository.py

@@ -1302,6 +1302,7 @@ class BuildRepoRootTests(TestCase):
         c.set(("core",), "looseCompression", "4")
         c.set(("core",), "looseCompression", "4")
         c.write_to_path()
         c.write_to_path()
         r = Repo(self._repo_dir)
         r = Repo(self._repo_dir)
+        self.addCleanup(r.close)
         self.assertEqual(r.object_store.loose_compression_level, 4)
         self.assertEqual(r.object_store.loose_compression_level, 4)
 
 
     def test_repositoryformatversion_unsupported(self) -> None:
     def test_repositoryformatversion_unsupported(self) -> None:

+ 5 - 2
tests/test_sparse_patterns.py

@@ -553,8 +553,11 @@ class ApplyIncludedPathsTests(TestCase):
         filter_registry = FilterRegistry()
         filter_registry = FilterRegistry()
         filter_registry.register_driver("uppercase", UppercaseFilter())
         filter_registry.register_driver("uppercase", UppercaseFilter())
 
 
-        # Create gitattributes dict
-        gitattributes = {b"*.txt": {b"filter": b"uppercase"}}
+        # Create gitattributes object
+        from dulwich.attrs import GitAttributes, Pattern
+
+        patterns = [(Pattern(b"*.txt"), {b"filter": b"uppercase"})]
+        gitattributes = GitAttributes(patterns)
 
 
         # Monkey patch the repo to use our filter registry
         # Monkey patch the repo to use our filter registry
         original_get_blob_normalizer = self.repo.get_blob_normalizer
         original_get_blob_normalizer = self.repo.get_blob_normalizer