瀏覽代碼

Fixed #31147 -- Added SearchHeadline to django.contrib.postgres.

Hannes Ljungberg 5 年之前
父節點
當前提交
65ab4f9f03

+ 53 - 0
django/contrib/postgres/search.py

@@ -1,3 +1,5 @@
+import psycopg2
+
 from django.db.models import (
     CharField, Expression, Field, FloatField, Func, Lookup, TextField, Value,
 )
@@ -230,6 +232,57 @@ class SearchRank(Func):
         super().__init__(*expressions)
 
 
+class SearchHeadline(Func):
+    function = 'ts_headline'
+    template = '%(function)s(%(expressions)s%(options)s)'
+    output_field = TextField()
+
+    def __init__(
+        self, expression, query, *, config=None, start_sel=None, stop_sel=None,
+        max_words=None, min_words=None, short_word=None, highlight_all=None,
+        max_fragments=None, fragment_delimiter=None,
+    ):
+        if not hasattr(query, 'resolve_expression'):
+            query = SearchQuery(query)
+        options = {
+            'StartSel': start_sel,
+            'StopSel': stop_sel,
+            'MaxWords': max_words,
+            'MinWords': min_words,
+            'ShortWord': short_word,
+            'HighlightAll': highlight_all,
+            'MaxFragments': max_fragments,
+            'FragmentDelimiter': fragment_delimiter,
+        }
+        self.options = {
+            option: value
+            for option, value in options.items() if value is not None
+        }
+        expressions = (expression, query)
+        if config is not None:
+            config = SearchConfig.from_parameter(config)
+            expressions = (config,) + expressions
+        super().__init__(*expressions)
+
+    def as_sql(self, compiler, connection, function=None, template=None):
+        options_sql = ''
+        options_params = []
+        if self.options:
+            # getquoted() returns a quoted bytestring of the adapted value.
+            options_params.append(', '.join(
+                '%s=%s' % (
+                    option,
+                    psycopg2.extensions.adapt(value).getquoted().decode(),
+                ) for option, value in self.options.items()
+            ))
+            options_sql = ', %s'
+        sql, params = super().as_sql(
+            compiler, connection, function=function, template=template,
+            options=options_sql,
+        )
+        return sql, params + options_params
+
+
 SearchVectorField.register_lookup(SearchVectorExact)
 
 

+ 54 - 0
docs/ref/contrib/postgres/search.txt

@@ -132,6 +132,60 @@ order by relevancy::
 See :ref:`postgresql-fts-weighting-queries` for an explanation of the
 ``weights`` parameter.
 
+``SearchHeadline``
+==================
+
+.. versionadded:: 3.1
+
+.. class:: SearchHeadline(expression, query, config=None, start_sel=None, stop_sel=None, max_words=None, min_words=None, short_word=None, highlight_all=None, max_fragments=None, fragment_delimiter=None)
+
+Accepts a single text field or an expression, a query, a config, and a set of
+options. Returns highlighted search results.
+
+Set the ``start_sel`` and ``stop_sel`` parameters to the string values to be
+used to wrap highlighted query terms in the document. PostgreSQL's defaults are
+``<b>`` and ``</b>``.
+
+Provide integer values to the ``max_words`` and ``min_words`` parameters to
+determine the longest and shortest headlines. PostgreSQL's defaults are 35 and
+15.
+
+Provide an integer value to the ``short_word`` parameter to discard words of
+this length or less in each headline. PostgreSQL's default is 3.
+
+Set the ``highlight_all`` parameter to ``True`` to use the whole document in
+place of a fragment and ignore ``max_words``, ``min_words``, and ``short_word``
+parameters. That's disabled by default in PostgreSQL.
+
+Provide a non-zero integer value to the ``max_fragments`` to set the maximum
+number of fragments to display. That's disabled by default in PostgreSQL.
+
+Set the ``fragment_delimiter`` string parameter to configure the delimiter
+between fragments. PostgreSQL's default is ``" ... "``.
+
+The PostgreSQL documentation has more details on `highlighting search
+results`_.
+
+Usage example::
+
+    >>> from django.contrib.postgres.search import SearchHeadline, SearchQuery
+    >>> query = SearchQuery('red tomato')
+    >>> entry = Entry.objects.annotate(
+    ...     headline=SearchHeadline(
+    ...         'body_text',
+    ...         query,
+    ...         start_sel='<span>',
+    ...         stop_sel='</span>',
+    ...     ),
+    ... ).get()
+    >>> print(entry.headline)
+    Sandwich with <span>tomato</span> and <span>red</span> cheese.
+
+See :ref:`postgresql-fts-search-configuration` for an explanation of the
+``config`` parameter.
+
+.. _highlighting search results: https://www.postgresql.org/docs/current/textsearch-controls.html#TEXTSEARCH-HEADLINE
+
 .. _postgresql-fts-search-configuration:
 
 Changing the search configuration

+ 3 - 0
docs/releases/3.1.txt

@@ -108,6 +108,9 @@ Minor features
 * :class:`~django.contrib.postgres.search.SearchQuery` now supports
   ``'websearch'`` search type on PostgreSQL 11+.
 
+* The new :class:`~django.contrib.postgres.search.SearchHeadline` class allows
+  highlighting search results.
+
 :mod:`django.contrib.redirects`
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 

+ 124 - 5
tests/postgres_tests/test_search.py

@@ -5,16 +5,20 @@ These tests use dialogue from the 1975 film Monty Python and the Holy Grail.
 All text copyright Python (Monty) Pictures. Thanks to sacred-texts.com for the
 transcript.
 """
-from django.contrib.postgres.search import (
-    SearchConfig, SearchQuery, SearchRank, SearchVector,
-)
 from django.db import connection
 from django.db.models import F
-from django.test import SimpleTestCase, modify_settings, skipUnlessDBFeature
+from django.test import modify_settings, skipUnlessDBFeature
 
 from . import PostgreSQLSimpleTestCase, PostgreSQLTestCase
 from .models import Character, Line, Scene
 
+try:
+    from django.contrib.postgres.search import (
+        SearchConfig, SearchHeadline, SearchQuery, SearchRank, SearchVector,
+    )
+except ImportError:
+    pass
+
 
 class GrailTestData:
 
@@ -436,7 +440,7 @@ class SearchVectorIndexTests(PostgreSQLTestCase):
             )
 
 
-class SearchQueryTests(SimpleTestCase):
+class SearchQueryTests(PostgreSQLSimpleTestCase):
     def test_str(self):
         tests = (
             (~SearchQuery('a'), '~SearchQuery(a)'),
@@ -460,3 +464,118 @@ class SearchQueryTests(SimpleTestCase):
         for query, expected_str in tests:
             with self.subTest(query=query):
                 self.assertEqual(str(query), expected_str)
+
+
+@modify_settings(INSTALLED_APPS={'append': 'django.contrib.postgres'})
+class SearchHeadlineTests(GrailTestData, PostgreSQLTestCase):
+    def test_headline(self):
+        searched = Line.objects.annotate(
+            headline=SearchHeadline(
+                F('dialogue'),
+                SearchQuery('brave sir robin'),
+                config=SearchConfig('english'),
+            ),
+        ).get(pk=self.verse0.pk)
+        self.assertEqual(
+            searched.headline,
+            '<b>Robin</b>. He was not at all afraid to be killed in nasty '
+            'ways. <b>Brave</b>, <b>brave</b>, <b>brave</b>, <b>brave</b> '
+            '<b>Sir</b> <b>Robin</b>',
+        )
+
+    def test_headline_untyped_args(self):
+        searched = Line.objects.annotate(
+            headline=SearchHeadline('dialogue', 'killed', config='english'),
+        ).get(pk=self.verse0.pk)
+        self.assertEqual(
+            searched.headline,
+            'Robin. He was not at all afraid to be <b>killed</b> in nasty '
+            'ways. Brave, brave, brave, brave Sir Robin!',
+        )
+
+    def test_headline_with_config(self):
+        searched = Line.objects.annotate(
+            headline=SearchHeadline(
+                'dialogue',
+                SearchQuery('cadeaux', config='french'),
+                config='french',
+            ),
+        ).get(pk=self.french.pk)
+        self.assertEqual(
+            searched.headline,
+            'Oh. Un beau <b>cadeau</b>. Oui oui.',
+        )
+
+    def test_headline_with_config_from_field(self):
+        searched = Line.objects.annotate(
+            headline=SearchHeadline(
+                'dialogue',
+                SearchQuery('cadeaux', config=F('dialogue_config')),
+                config=F('dialogue_config'),
+            ),
+        ).get(pk=self.french.pk)
+        self.assertEqual(
+            searched.headline,
+            'Oh. Un beau <b>cadeau</b>. Oui oui.',
+        )
+
+    def test_headline_separator_options(self):
+        searched = Line.objects.annotate(
+            headline=SearchHeadline(
+                'dialogue',
+                'brave sir robin',
+                start_sel='<span>',
+                stop_sel='</span>',
+            ),
+        ).get(pk=self.verse0.pk)
+        self.assertEqual(
+            searched.headline,
+            '<span>Robin</span>. He was not at all afraid to be killed in '
+            'nasty ways. <span>Brave</span>, <span>brave</span>, <span>brave'
+            '</span>, <span>brave</span> <span>Sir</span> <span>Robin</span>',
+        )
+
+    def test_headline_highlight_all_option(self):
+        searched = Line.objects.annotate(
+            headline=SearchHeadline(
+                'dialogue',
+                SearchQuery('brave sir robin', config='english'),
+                highlight_all=True,
+            ),
+        ).get(pk=self.verse0.pk)
+        self.assertIn(
+            '<b>Bravely</b> bold <b>Sir</b> <b>Robin</b>, rode forth from '
+            'Camelot. He was not afraid to die, o ',
+            searched.headline,
+        )
+
+    def test_headline_short_word_option(self):
+        searched = Line.objects.annotate(
+            headline=SearchHeadline(
+                'dialogue',
+                SearchQuery('brave sir robin', config='english'),
+                short_word=6,
+            ),
+        ).get(pk=self.verse0.pk)
+        self.assertIs(searched.headline.endswith(
+            '<b>Brave</b>, <b>brave</b>, <b>brave</b>, <b>brave</b> <b>Sir</b>'
+        ), True)
+
+    def test_headline_fragments_words_options(self):
+        searched = Line.objects.annotate(
+            headline=SearchHeadline(
+                'dialogue',
+                SearchQuery('brave sir robin', config='english'),
+                fragment_delimiter='...<br>',
+                max_fragments=4,
+                max_words=3,
+                min_words=1,
+            ),
+        ).get(pk=self.verse0.pk)
+        self.assertEqual(
+            searched.headline,
+            '<b>Sir</b> <b>Robin</b>, rode...<br>'
+            '<b>Brave</b> <b>Sir</b> <b>Robin</b>...<br>'
+            '<b>Brave</b>, <b>brave</b>, <b>brave</b>...<br>'
+            '<b>brave</b> <b>Sir</b> <b>Robin</b>',
+        )

+ 5 - 1
tests/postgres_tests/test_trigram.py

@@ -1,9 +1,13 @@
-from django.contrib.postgres.search import TrigramDistance, TrigramSimilarity
 from django.test import modify_settings
 
 from . import PostgreSQLTestCase
 from .models import CharFieldModel, TextFieldModel
 
+try:
+    from django.contrib.postgres.search import TrigramDistance, TrigramSimilarity
+except ImportError:
+    pass
+
 
 @modify_settings(INSTALLED_APPS={'append': 'django.contrib.postgres'})
 class TrigramTest(PostgreSQLTestCase):