Browse Source

Fixed #28194 -- Added support for normalization and cover density to SearchRank.

Hannes Ljungberg 5 years ago
parent
commit
0b51a4f894

+ 10 - 1
django/contrib/postgres/search.py

@@ -208,7 +208,10 @@ class SearchRank(Func):
     function = 'ts_rank'
     output_field = FloatField()
 
-    def __init__(self, vector, query, weights=None):
+    def __init__(
+        self, vector, query, weights=None, normalization=None,
+        cover_density=False,
+    ):
         if not hasattr(vector, 'resolve_expression'):
             vector = SearchVector(vector)
         if not hasattr(query, 'resolve_expression'):
@@ -218,6 +221,12 @@ class SearchRank(Func):
             if not hasattr(weights, 'resolve_expression'):
                 weights = Value(weights)
             expressions = (weights,) + expressions
+        if normalization is not None:
+            if not hasattr(normalization, 'resolve_expression'):
+                normalization = Value(normalization)
+            expressions += (normalization,)
+        if cover_density:
+            self.function = 'ts_rank_cd'
         super().__init__(*expressions)
 
 

+ 27 - 1
docs/ref/contrib/postgres/search.txt

@@ -118,7 +118,7 @@ See :ref:`postgresql-fts-search-configuration` for an explanation of the
 ``SearchRank``
 ==============
 
-.. class:: SearchRank(vector, query, weights=None)
+.. class:: SearchRank(vector, query, weights=None, normalization=None, cover_density=False)
 
 So far, we've returned the results for which any match between the vector and
 the query are possible. It's likely you may wish to order the results by some
@@ -137,6 +137,32 @@ order by relevancy::
 See :ref:`postgresql-fts-weighting-queries` for an explanation of the
 ``weights`` parameter.
 
+Set the ``cover_density`` parameter to ``True`` to enable the cover density
+ranking, which means that the proximity of matching query terms is taken into
+account.
+
+Provide an integer to the ``normalization`` parameter to control rank
+normalization. This integer is a bit mask, so you can combine multiple
+behaviors::
+
+    >>> from django.db.models import Value
+    >>> Entry.objects.annotate(
+    ...     rank=SearchRank(
+    ...         vector,
+    ...         query,
+    ...         normalization=Value(2).bitor(Value(4)),
+    ...     )
+    ... )
+
+The PostgreSQL documentation has more details about `different rank
+normalization options`_.
+
+.. _different rank normalization options: https://www.postgresql.org/docs/current/textsearch-controls.html#TEXTSEARCH-RANKING
+
+.. versionadded:: 3.1
+
+    The ``normalization`` and ``cover_density`` parameters were added.
+
 ``SearchHeadline``
 ==================
 

+ 8 - 0
docs/releases/3.1.txt

@@ -160,6 +160,14 @@ Minor features
 
 * :lookup:`search` lookup now supports query expressions.
 
+* The new ``cover_density`` parameter of
+  :class:`~django.contrib.postgres.search.SearchRank` allows ranking by cover
+  density.
+
+* The new ``normalization`` parameter of
+  :class:`~django.contrib.postgres.search.SearchRank` allows rank
+  normalization.
+
 :mod:`django.contrib.redirects`
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 

+ 61 - 1
tests/postgres_tests/test_search.py

@@ -6,7 +6,7 @@ All text copyright Python (Monty) Pictures. Thanks to sacred-texts.com for the
 transcript.
 """
 from django.db import connection
-from django.db.models import F
+from django.db.models import F, Value
 from django.test import modify_settings, skipUnlessDBFeature
 
 from . import PostgreSQLSimpleTestCase, PostgreSQLTestCase
@@ -449,6 +449,66 @@ class TestRankingAndWeights(GrailTestData, PostgreSQLTestCase):
         ).filter(rank__gt=0.3)
         self.assertSequenceEqual(searched, [self.verse0])
 
+    def test_cover_density_ranking(self):
+        not_dense_verse = Line.objects.create(
+            scene=self.robin,
+            character=self.minstrel,
+            dialogue=(
+                'Bravely taking to his feet, he beat a very brave retreat. '
+                'A brave retreat brave Sir Robin.'
+            )
+        )
+        searched = Line.objects.filter(character=self.minstrel).annotate(
+            rank=SearchRank(
+                SearchVector('dialogue'),
+                SearchQuery('brave robin'),
+                cover_density=True,
+            ),
+        ).order_by('rank', '-pk')
+        self.assertSequenceEqual(
+            searched,
+            [self.verse2, not_dense_verse, self.verse1, self.verse0],
+        )
+
+    def test_ranking_with_normalization(self):
+        short_verse = Line.objects.create(
+            scene=self.robin,
+            character=self.minstrel,
+            dialogue='A brave retreat brave Sir Robin.',
+        )
+        searched = Line.objects.filter(character=self.minstrel).annotate(
+            rank=SearchRank(
+                SearchVector('dialogue'),
+                SearchQuery('brave sir robin'),
+                # Divide the rank by the document length.
+                normalization=2,
+            ),
+        ).order_by('rank')
+        self.assertSequenceEqual(
+            searched,
+            [self.verse2, self.verse1, self.verse0, short_verse],
+        )
+
+    def test_ranking_with_masked_normalization(self):
+        short_verse = Line.objects.create(
+            scene=self.robin,
+            character=self.minstrel,
+            dialogue='A brave retreat brave Sir Robin.',
+        )
+        searched = Line.objects.filter(character=self.minstrel).annotate(
+            rank=SearchRank(
+                SearchVector('dialogue'),
+                SearchQuery('brave sir robin'),
+                # Divide the rank by the document length and by the number of
+                # unique words in document.
+                normalization=Value(2).bitor(Value(8)),
+            ),
+        ).order_by('rank')
+        self.assertSequenceEqual(
+            searched,
+            [self.verse2, self.verse1, self.verse0, short_verse],
+        )
+
 
 class SearchVectorIndexTests(PostgreSQLTestCase):
     def test_search_vector_index(self):