Parcourir la source

Fixed #8149 -- Made File.__iter__() support universal newlines.

The following are recognized as ending a line: the Unix end-of-line
convention '\n', the Windows convention '\r\n', and the old
Macintosh convention '\r'.

http://www.python.org/dev/peps/pep-0278

Thanks tchaumeny for review.
Jon Dufresne il y a 10 ans
Parent
commit
eb4f6de980
5 fichiers modifiés avec 108 ajouts et 12 suppressions
  1. 34 7
      django/core/files/base.py
  2. 9 0
      docs/ref/files/file.txt
  3. 9 4
      docs/ref/files/uploads.txt
  4. 7 0
      docs/releases/1.8.txt
  5. 49 1
      tests/files/tests.py

+ 34 - 7
django/core/files/base.py

@@ -102,16 +102,22 @@ class File(FileProxyMixin):
         # Iterate over this file-like object by newlines
         buffer_ = None
         for chunk in self.chunks():
-            chunk_buffer = BytesIO(chunk)
-
-            for line in chunk_buffer:
+            for line in chunk.splitlines(True):
                 if buffer_:
-                    line = buffer_ + line
+                    if endswith_cr(buffer_) and not equals_lf(line):
+                        # Line split after a \r newline; yield buffer_.
+                        yield buffer_
+                        # Continue with line.
+                    else:
+                        # Line either split without a newline (line
+                        # continues after buffer_) or with \r\n
+                        # newline (line == b'\n').
+                        line = buffer_ + line
+                    # buffer_ handled, clear it.
                     buffer_ = None
 
-                # If this is the end of a line, yield
-                # otherwise, wait for the next round
-                if line[-1:] in (b'\n', b'\r'):
+                # If this is the end of a \n or \r\n line, yield.
+                if endswith_lf(line):
                     yield line
                 else:
                     buffer_ = line
@@ -165,3 +171,24 @@ class ContentFile(File):
 
     def close(self):
         pass
+
+
+def endswith_cr(line):
+    """
+    Return True if line (a text or byte string) ends with '\r'.
+    """
+    return line.endswith('\r' if isinstance(line, six.text_type) else b'\r')
+
+
+def endswith_lf(line):
+    """
+    Return True if line (a text or byte string) ends with '\n'.
+    """
+    return line.endswith('\n' if isinstance(line, six.text_type) else b'\n')
+
+
+def equals_lf(line):
+    """
+    Return True if line (a text or byte string) equals '\n'.
+    """
+    return line == ('\n' if isinstance(line, six.text_type) else b'\n')

+ 9 - 0
docs/ref/files/file.txt

@@ -53,6 +53,15 @@ The ``File`` Class
 
         Iterate over the file yielding one line at a time.
 
+        .. versionchanged:: 1.8
+
+            ``File`` now uses `universal newlines`_. The following are
+            recognized as ending a line: the Unix end-of-line convention
+            ``'\n'``, the Windows convention ``'\r\n'``, and the old Macintosh
+            convention ``'\r'``.
+
+            .. _universal newlines: http://www.python.org/dev/peps/pep-0278
+
     .. method:: chunks([chunk_size=None])
 
         Iterate over the file yielding "chunks" of a given size. ``chunk_size``

+ 9 - 4
docs/ref/files/uploads.txt

@@ -82,10 +82,15 @@ Here are some useful attributes of ``UploadedFile``:
         for line in uploadedfile:
             do_something_with(line)
 
-    However, *unlike* standard Python files, :class:`UploadedFile` only
-    understands ``\n`` (also known as "Unix-style") line endings. If you know
-    that you need to handle uploaded files with different line endings, you'll
-    need to do so in your view.
+    Lines are split using `universal newlines`_. The following are recognized
+    as ending a line: the Unix end-of-line convention ``'\n'``, the Windows
+    convention ``'\r\n'``, and the old Macintosh convention ``'\r'``.
+
+    .. _universal newlines: http://www.python.org/dev/peps/pep-0278
+
+    .. versionchanged:: 1.8
+
+        Previously lines were only split on the Unix end-of-line ``'\n'``.
 
 Subclasses of ``UploadedFile`` include:
 

+ 7 - 0
docs/releases/1.8.txt

@@ -659,6 +659,13 @@ Miscellaneous
 * By default, :ref:`call_command <call-command>` now always skips the check
   framework (unless you pass it ``skip_checks=False``).
 
+* When iterating over lines, :class:`~django.core.files.File` now uses
+  `universal newlines`_. The following are recognized as ending a line: the
+  Unix end-of-line convention ``'\n'``, the Windows convention ``'\r\n'``, and
+  the old Macintosh convention ``'\r'``.
+
+  .. _universal newlines: http://www.python.org/dev/peps/pep-0278
+
 .. _deprecated-features-1.8:
 
 Features deprecated in 1.8

+ 49 - 1
tests/files/tests.py

@@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 from __future__ import unicode_literals
 
-from io import BytesIO
+from io import BytesIO, StringIO
 import os
 import gzip
 import tempfile
@@ -72,6 +72,54 @@ class FileTests(unittest.TestCase):
         file = File(BytesIO(b'one\ntwo\nthree'))
         self.assertEqual(list(file), [b'one\n', b'two\n', b'three'])
 
+    def test_file_iteration_windows_newlines(self):
+        """
+        #8149 - File objects with \r\n line endings should yield lines
+        when iterated over.
+        """
+        f = File(BytesIO(b'one\r\ntwo\r\nthree'))
+        self.assertEqual(list(f), [b'one\r\n', b'two\r\n', b'three'])
+
+    def test_file_iteration_mac_newlines(self):
+        """
+        #8149 - File objects with \r line endings should yield lines
+        when iterated over.
+        """
+        f = File(BytesIO(b'one\rtwo\rthree'))
+        self.assertEqual(list(f), [b'one\r', b'two\r', b'three'])
+
+    def test_file_iteration_mixed_newlines(self):
+        f = File(BytesIO(b'one\rtwo\nthree\r\nfour'))
+        self.assertEqual(list(f), [b'one\r', b'two\n', b'three\r\n', b'four'])
+
+    def test_file_iteration_with_unix_newline_at_chunk_boundary(self):
+        f = File(BytesIO(b'one\ntwo\nthree'))
+        # Set chunk size to create a boundary after \n:
+        # b'one\n...
+        #        ^
+        f.DEFAULT_CHUNK_SIZE = 4
+        self.assertEqual(list(f), [b'one\n', b'two\n', b'three'])
+
+    def test_file_iteration_with_windows_newline_at_chunk_boundary(self):
+        f = File(BytesIO(b'one\r\ntwo\r\nthree'))
+        # Set chunk size to create a boundary between \r and \n:
+        # b'one\r\n...
+        #        ^
+        f.DEFAULT_CHUNK_SIZE = 4
+        self.assertEqual(list(f), [b'one\r\n', b'two\r\n', b'three'])
+
+    def test_file_iteration_with_mac_newline_at_chunk_boundary(self):
+        f = File(BytesIO(b'one\rtwo\rthree'))
+        # Set chunk size to create a boundary after \r:
+        # b'one\r...
+        #        ^
+        f.DEFAULT_CHUNK_SIZE = 4
+        self.assertEqual(list(f), [b'one\r', b'two\r', b'three'])
+
+    def test_file_iteration_with_text(self):
+        f = File(StringIO('one\ntwo\nthree'))
+        self.assertEqual(list(f), ['one\n', 'two\n', 'three'])
+
 
 class NoNameFileTestCase(unittest.TestCase):
     """