7 년 전 · edee5a8de6
--- a/django/db/models/query.py
+++ b/django/db/models/query.py
@@ -20,7 +20,7 @@ from django.db.models.expressions import F
 
				 from django.db.models.fields import AutoField
			
 
				 from django.db.models.functions import Trunc
			
 
				 from django.db.models.query_utils import InvalidQuery, Q
			
 
				-from django.db.models.sql.constants import CURSOR
			
 
				+from django.db.models.sql.constants import CURSOR, GET_ITERATOR_CHUNK_SIZE
			
 
				 from django.utils import timezone
			
 
				 from django.utils.functional import cached_property, partition
			
 
				 from django.utils.version import get_version
			
@@ -33,9 +33,10 @@ EmptyResultSet = sql.EmptyResultSet
 
				 
			
 
				 
			
 
				 class BaseIterable:
			
 
				-    def __init__(self, queryset, chunked_fetch=False):
			
 
				+    def __init__(self, queryset, chunked_fetch=False, chunk_size=GET_ITERATOR_CHUNK_SIZE):
			
 
				         self.queryset = queryset
			
 
				         self.chunked_fetch = chunked_fetch
			
 
				+        self.chunk_size = chunk_size
			
 
				 
			
 
				 
			
 
				 class ModelIterable(BaseIterable):
			
@@ -47,7 +48,7 @@ class ModelIterable(BaseIterable):
 
				         compiler = queryset.query.get_compiler(using=db)
			
 
				         # Execute the query. This will also fill compiler.select, klass_info,
			
 
				         # and annotations.
			
 
				-        results = compiler.execute_sql(chunked_fetch=self.chunked_fetch)
			
 
				+        results = compiler.execute_sql(chunked_fetch=self.chunked_fetch, chunk_size=self.chunk_size)
			
 
				         select, klass_info, annotation_col_map = (compiler.select, compiler.klass_info,
			
 
				                                                   compiler.annotation_col_map)
			
 
				         model_cls = klass_info['model']
			
@@ -301,13 +302,15 @@ class QuerySet:
 
				     # METHODS THAT DO DATABASE QUERIES #
			
 
				     ####################################
			
 
				 
			
 
				-    def iterator(self):
			
 
				+    def iterator(self, chunk_size=2000):
			
 
				         """
			
 
				         An iterator over the results from applying this QuerySet to the
			
 
				         database.
			
 
				         """
			
 
				+        if chunk_size <= 0:
			
 
				+            raise ValueError('Chunk size must be strictly positive.')
			
 
				         use_chunked_fetch = not connections[self.db].settings_dict.get('DISABLE_SERVER_SIDE_CURSORS')
			
 
				-        return iter(self._iterable_class(self, chunked_fetch=use_chunked_fetch))
			
 
				+        return iter(self._iterable_class(self, chunked_fetch=use_chunked_fetch, chunk_size=chunk_size))
			
 
				 
			
 
				     def aggregate(self, *args, **kwargs):
			
 
				         """
			
--- a/django/db/models/sql/compiler.py
+++ b/django/db/models/sql/compiler.py
@@ -883,7 +883,7 @@ class SQLCompiler:
 
				         self.query.set_extra_mask(['a'])
			
 
				         return bool(self.execute_sql(SINGLE))
			
 
				 
			
 
				-    def execute_sql(self, result_type=MULTI, chunked_fetch=False):
			
 
				+    def execute_sql(self, result_type=MULTI, chunked_fetch=False, chunk_size=GET_ITERATOR_CHUNK_SIZE):
			
 
				         """
			
 
				         Run the query against the database and return the result(s). The
			
 
				         return value is a single data item if result_type is SINGLE, or an
			
@@ -937,7 +937,8 @@ class SQLCompiler:
 
				 
			
 
				         result = cursor_iter(
			
 
				             cursor, self.connection.features.empty_fetchmany_value,
			
 
				-            self.col_count
			
 
				+            self.col_count,
			
 
				+            chunk_size,
			
 
				         )
			
 
				         if not chunked_fetch and not self.connection.features.can_use_chunked_reads:
			
 
				             try:
			
@@ -1298,14 +1299,13 @@ class SQLAggregateCompiler(SQLCompiler):
 
				         return sql, params
			
 
				 
			
 
				 
			
 
				-def cursor_iter(cursor, sentinel, col_count):
			
 
				+def cursor_iter(cursor, sentinel, col_count, itersize):
			
 
				     """
			
 
				     Yield blocks of rows from a cursor and ensure the cursor is closed when
			
 
				     done.
			
 
				     """
			
 
				     try:
			
 
				-        for rows in iter((lambda: cursor.fetchmany(GET_ITERATOR_CHUNK_SIZE)),
			
 
				-                         sentinel):
			
 
				+        for rows in iter((lambda: cursor.fetchmany(itersize)), sentinel):
			
 
				             yield [r[0:col_count] for r in rows]
			
 
				     finally:
			
 
				         cursor.close()
			
--- a/docs/ref/models/querysets.txt
+++ b/docs/ref/models/querysets.txt
@@ -2004,7 +2004,7 @@ If you pass ``in_bulk()`` an empty list, you'll get an empty dictionary.
 
				 ``iterator()``
			
 
				 ~~~~~~~~~~~~~~
			
 
				 
			
 
				-.. method:: iterator()
			
 
				+.. method:: iterator(chunk_size=2000)
			
 
				 
			
 
				 Evaluates the ``QuerySet`` (by performing the query) and returns an iterator
			
 
				 (see :pep:`234`) over the results. A ``QuerySet`` typically caches its results
			
@@ -2033,6 +2033,11 @@ set into memory.
 
				 
			
 
				 The Oracle database driver always uses server-side cursors.
			
 
				 
			
 
				+With server-side cursors, the ``chunk_size`` parameter specifies the number of
			
 
				+results to cache at the database driver level. Fetching bigger chunks
			
 
				+diminishes the number of round trips between the database driver and the
			
 
				+database, at the expense of memory.
			
 
				+
			
 
				 On PostgreSQL, server-side cursors will only be used when the
			
 
				 :setting:`DISABLE_SERVER_SIDE_CURSORS <DATABASE-DISABLE_SERVER_SIDE_CURSORS>`
			
 
				 setting is ``False``. Read :ref:`transaction-pooling-server-side-cursors` if
			
@@ -2048,10 +2053,25 @@ drivers load the entire result set into memory. The result set is then
 
				 transformed into Python row objects by the database adapter using the
			
 
				 ``fetchmany()`` method defined in :pep:`249`.
			
 
				 
			
 
				+The ``chunk_size`` parameter controls the size of batches Django retrieves from
			
 
				+the database driver. Larger batches decrease the overhead of communicating with
			
 
				+the database driver at the expense of a slight increase in memory consumption.
			
 
				+
			
 
				+The default value of ``chunk_size``, 2000, comes from `a calculation on the
			
 
				+psycopg mailing list <https://www.postgresql.org/message-id/4D2F2C71.8080805%40dndg.it>`_:
			
 
				+
			
 
				+    Assuming rows of 10-20 columns with a mix of textual and numeric data, 2000
			
 
				+    is going to fetch less than 100KB of data, which seems a good compromise
			
 
				+    between the number of rows transferred and the data discarded if the loop
			
 
				+    is exited early.
			
 
				+
			
 
				 .. versionchanged:: 1.11
			
 
				 
			
 
				     PostgreSQL support for server-side cursors was added.
			
 
				 
			
 
				+.. versionchanged:: 2.0
			
 
				+
			
 
				+    The ``chunk_size`` parameter was added.
			
 
				 
			
 
				 ``latest()``
			
 
				 ~~~~~~~~~~~~
			
--- a/docs/releases/2.0.txt
+++ b/docs/releases/2.0.txt
@@ -214,6 +214,11 @@ Models
 
				 
			
 
				   .. _`identity columns`: https://docs.oracle.com/database/121/DRDAA/migr_tools_feat.htm#DRDAA109
			
 
				 
			
 
				+* The new ``chunk_size`` parameter of :meth:`.QuerySet.iterator` controls the
			
 
				+  number of rows fetched by the Python database client when streaming results
			
 
				+  from the database. For databases that don't support server-side cursors, it
			
 
				+  controls the number of results Django fetches from the database adapter.
			
 
				+
			
 
				 Requests and Responses
			
 
				 ~~~~~~~~~~~~~~~~~~~~~~
			
 
				 
			
@@ -280,6 +285,13 @@ Database backend API
 
				   attribute with the name of the database that your backend works with. Django
			
 
				   may use it in various messages, such as in system checks.
			
 
				 
			
 
				+* To improve performance when streaming large result sets from the database,
			
 
				+  :meth:`.QuerySet.iterator` now fetches 2000 rows at a time instead of 100.
			
 
				+  The old behavior can be restored using the ``chunk_size`` parameter. For
			
 
				+  example::
			
 
				+
			
 
				+      Book.objects.iterator(chunk_size=100)
			
 
				+
			
 
				 Dropped support for Oracle 11.2
			
 
				 -------------------------------
			
 
				 
			
--- a/tests/queries/test_iterator.py
+++ b/tests/queries/test_iterator.py
@@ -0,0 +1,39 @@
 
				+import datetime
			
 
				+from unittest import mock
			
 
				+
			
 
				+from django.db.models.sql.compiler import cursor_iter
			
 
				+from django.test import TestCase
			
 
				+
			
 
				+from .models import Article
			
 
				+
			
 
				+
			
 
				+class QuerySetIteratorTests(TestCase):
			
 
				+    itersize_index_in_mock_args = 3
			
 
				+
			
 
				+    @classmethod
			
 
				+    def setUpTestData(cls):
			
 
				+        Article.objects.create(name='Article 1', created=datetime.datetime.now())
			
 
				+        Article.objects.create(name='Article 2', created=datetime.datetime.now())
			
 
				+
			
 
				+    def test_iterator_invalid_chunk_size(self):
			
 
				+        for size in (0, -1):
			
 
				+            with self.subTest(size=size):
			
 
				+                with self.assertRaisesMessage(ValueError, 'Chunk size must be strictly positive.'):
			
 
				+                    Article.objects.iterator(chunk_size=size)
			
 
				+
			
 
				+    def test_default_iterator_chunk_size(self):
			
 
				+        qs = Article.objects.iterator()
			
 
				+        with mock.patch('django.db.models.sql.compiler.cursor_iter', side_effect=cursor_iter) as cursor_iter_mock:
			
 
				+            next(qs)
			
 
				+        self.assertEqual(cursor_iter_mock.call_count, 1)
			
 
				+        mock_args, _mock_kwargs = cursor_iter_mock.call_args
			
 
				+        self.assertEqual(mock_args[self.itersize_index_in_mock_args], 2000)
			
 
				+
			
 
				+    def test_iterator_chunk_size(self):
			
 
				+        batch_size = 3
			
 
				+        qs = Article.objects.iterator(chunk_size=batch_size)
			
 
				+        with mock.patch('django.db.models.sql.compiler.cursor_iter', side_effect=cursor_iter) as cursor_iter_mock:
			
 
				+            next(qs)
			
 
				+        self.assertEqual(cursor_iter_mock.call_count, 1)
			
 
				+        mock_args, _mock_kwargs = cursor_iter_mock.call_args
			
 
				+        self.assertEqual(mock_args[self.itersize_index_in_mock_args], batch_size)