Browse Source

Fixed #33788 -- Added TrigramStrictWordSimilarity() and TrigramStrictWordDistance() on PostgreSQL.

Matt Brewer 2 years ago
parent
commit
8d160f154f

+ 1 - 0
AUTHORS

@@ -636,6 +636,7 @@ answer newbie questions, and generally made Django that much better:
     Mathieu Agopian <mathieu.agopian@gmail.com>
     Matías Bordese
     Matt Boersma <matt@sprout.org>
+    Matt Brewer <matt.brewer693@gmail.com>
     Matt Croydon <http://www.postneo.com/>
     Matt Deacalion Stevens <matt@dirtymonkey.co.uk>
     Matt Dennenbaum

+ 11 - 1
django/contrib/postgres/apps.py

@@ -11,7 +11,13 @@ from django.db.models.indexes import IndexExpression
 from django.utils.translation import gettext_lazy as _
 
 from .indexes import OpClass
-from .lookups import SearchLookup, TrigramSimilar, TrigramWordSimilar, Unaccent
+from .lookups import (
+    SearchLookup,
+    TrigramSimilar,
+    TrigramStrictWordSimilar,
+    TrigramWordSimilar,
+    Unaccent,
+)
 from .serializers import RangeSerializer
 from .signals import register_type_handlers
 
@@ -37,6 +43,8 @@ def uninstall_if_needed(setting, value, enter, **kwargs):
         TextField._unregister_lookup(TrigramSimilar)
         CharField._unregister_lookup(TrigramWordSimilar)
         TextField._unregister_lookup(TrigramWordSimilar)
+        CharField._unregister_lookup(TrigramStrictWordSimilar)
+        TextField._unregister_lookup(TrigramStrictWordSimilar)
         # Disconnect this receiver until the next time this app is installed
         # and ready() connects it again to prevent unnecessary processing on
         # each setting change.
@@ -73,5 +81,7 @@ class PostgresConfig(AppConfig):
         TextField.register_lookup(TrigramSimilar)
         CharField.register_lookup(TrigramWordSimilar)
         TextField.register_lookup(TrigramWordSimilar)
+        CharField.register_lookup(TrigramStrictWordSimilar)
+        TextField.register_lookup(TrigramStrictWordSimilar)
         MigrationWriter.register_serializer(RANGE_TYPES, RangeSerializer)
         IndexExpression.register_wrappers(OrderBy, OpClass, Collate)

+ 5 - 0
django/contrib/postgres/lookups.py

@@ -63,3 +63,8 @@ class TrigramSimilar(PostgresOperatorLookup):
 class TrigramWordSimilar(PostgresOperatorLookup):
     lookup_name = "trigram_word_similar"
     postgres_operator = "%%>"
+
+
+class TrigramStrictWordSimilar(PostgresOperatorLookup):
+    lookup_name = "trigram_strict_word_similar"
+    postgres_operator = "%%>>"

+ 9 - 0
django/contrib/postgres/search.py

@@ -366,5 +366,14 @@ class TrigramWordDistance(TrigramWordBase):
     arg_joiner = " <<-> "
 
 
+class TrigramStrictWordDistance(TrigramWordBase):
+    function = ""
+    arg_joiner = " <<<-> "
+
+
 class TrigramWordSimilarity(TrigramWordBase):
     function = "WORD_SIMILARITY"
+
+
+class TrigramStrictWordSimilarity(TrigramWordBase):
+    function = "STRICT_WORD_SIMILARITY"

+ 25 - 0
docs/ref/contrib/postgres/lookups.txt

@@ -7,6 +7,9 @@ Trigram similarity
 
 .. fieldlookup:: trigram_similar
 
+``trigram_similar``
+-------------------
+
 The ``trigram_similar`` lookup allows you to perform trigram lookups,
 measuring the number of trigrams (three consecutive characters) shared, using a
 dedicated PostgreSQL extension. A trigram lookup is given an expression and
@@ -27,6 +30,9 @@ The ``trigram_similar`` lookup can be used on
 
 .. fieldlookup:: trigram_word_similar
 
+``trigram_word_similar``
+------------------------
+
 The ``trigram_word_similar`` lookup allows you to perform trigram word
 similarity lookups using a dedicated PostgreSQL extension. It can be
 approximately understood as measuring the greatest number of trigrams shared
@@ -46,6 +52,25 @@ The ``trigram_word_similar`` lookup can be used on
     >>> Sentence.objects.filter(name__trigram_word_similar='Middlesborough')
     ['<Sentence: Gumby rides on the path of Middlesbrough>']
 
+.. fieldlookup:: trigram_strict_word_similar
+
+``trigram_strict_word_similar``
+-------------------------------
+
+.. versionadded:: 4.2
+
+Similar to :lookup:`trigram_word_similar`, except that it forces extent
+boundaries to match word boundaries.
+
+To use it, add ``'django.contrib.postgres'`` in your :setting:`INSTALLED_APPS`
+and activate the `pg_trgm extension`_ on PostgreSQL. You can install the
+extension using the
+:class:`~django.contrib.postgres.operations.TrigramExtension` migration
+operation.
+
+The ``trigram_strict_word_similar`` lookup can be used on
+:class:`~django.db.models.CharField` and :class:`~django.db.models.TextField`.
+
 .. _`pg_trgm extension`: https://www.postgresql.org/docs/current/pgtrgm.html
 
 ``Unaccent``

+ 25 - 3
docs/ref/contrib/postgres/search.txt

@@ -286,9 +286,9 @@ Trigram similarity
 ==================
 
 Another approach to searching is trigram similarity. A trigram is a group of
-three consecutive characters. In addition to the :lookup:`trigram_similar` and
-:lookup:`trigram_word_similar` lookups, you can use a couple of other
-expressions.
+three consecutive characters. In addition to the :lookup:`trigram_similar`,
+:lookup:`trigram_word_similar`, and :lookup:`trigram_strict_word_similar`
+lookups, you can use a couple of other expressions.
 
 To use them, you need to activate the `pg_trgm extension
 <https://www.postgresql.org/docs/current/pgtrgm.html>`_ on PostgreSQL. You can
@@ -334,6 +334,18 @@ Usage example::
     ... ).filter(similarity__gt=0.3).order_by('-similarity')
     [<Author: Katy Stevens>]
 
+``TrigramStrictWordSimilarity``
+-------------------------------
+
+.. class:: TrigramStrictWordSimilarity(string, expression, **extra)
+
+.. versionadded:: 4.2
+
+Accepts a string or expression, and a field name or expression. Returns the
+trigram strict word similarity between the two arguments. Similar to
+:class:`TrigramWordSimilarity() <TrigramWordSimilarity>`, except that it forces
+extent boundaries to match word boundaries.
+
 ``TrigramDistance``
 -------------------
 
@@ -371,3 +383,13 @@ Usage example::
     ...     distance=TrigramWordDistance(test, 'name'),
     ... ).filter(distance__lte=0.7).order_by('distance')
     [<Author: Katy Stevens>]
+
+``TrigramStrictWordDistance``
+-----------------------------
+
+.. class:: TrigramStrictWordDistance(string, expression, **extra)
+
+.. versionadded:: 4.2
+
+Accepts a string or expression, and a field name or expression. Returns the
+trigram strict word distance between the two arguments.

+ 6 - 1
docs/releases/4.2.txt

@@ -65,7 +65,12 @@ Minor features
 :mod:`django.contrib.postgres`
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-* ...
+* The new :lookup:`trigram_strict_word_similar` lookup, and the
+  :class:`TrigramStrictWordSimilarity()
+  <django.contrib.postgres.search.TrigramStrictWordSimilarity>` and
+  :class:`TrigramStrictWordDistance()
+  <django.contrib.postgres.search.TrigramStrictWordDistance>` expressions allow
+  using trigram strict word similarity.
 
 :mod:`django.contrib.redirects`
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

+ 48 - 0
tests/postgres_tests/test_trigram.py

@@ -7,6 +7,8 @@ try:
     from django.contrib.postgres.search import (
         TrigramDistance,
         TrigramSimilarity,
+        TrigramStrictWordDistance,
+        TrigramStrictWordSimilarity,
         TrigramWordDistance,
         TrigramWordSimilarity,
     )
@@ -43,6 +45,25 @@ class TrigramTest(PostgreSQLTestCase):
             self.Model.objects.filter(field__trigram_word_similar="Middlesborough"),
             [obj],
         )
+        self.assertSequenceEqual(
+            self.Model.objects.filter(field__trigram_word_similar="Middle"),
+            [obj],
+        )
+
+    def test_trigram_strict_word_search_matched(self):
+        obj = self.Model.objects.create(
+            field="Gumby rides on the path of Middlesbrough",
+        )
+        self.assertSequenceEqual(
+            self.Model.objects.filter(
+                field__trigram_strict_word_similar="Middlesborough"
+            ),
+            [obj],
+        )
+        self.assertSequenceEqual(
+            self.Model.objects.filter(field__trigram_strict_word_similar="Middle"),
+            [],
+        )
 
     def test_trigram_similarity(self):
         search = "Bat sat on cat."
@@ -75,6 +96,19 @@ class TrigramTest(PostgreSQLTestCase):
             ],
         )
 
+    def test_trigram_strict_word_similarity(self):
+        search = "matt"
+        self.assertSequenceEqual(
+            self.Model.objects.filter(field__trigram_word_similar=search)
+            .annotate(word_similarity=TrigramStrictWordSimilarity(search, "field"))
+            .values("field", "word_similarity")
+            .order_by("-word_similarity"),
+            [
+                {"field": "Cat sat on mat.", "word_similarity": 0.5},
+                {"field": "Matthew", "word_similarity": 0.44444445},
+            ],
+        )
+
     def test_trigram_similarity_alternate(self):
         # Round result of distance because PostgreSQL uses greater precision.
         self.assertQuerysetEqual(
@@ -104,6 +138,20 @@ class TrigramTest(PostgreSQLTestCase):
             ],
         )
 
+    def test_trigram_strict_word_distance(self):
+        self.assertSequenceEqual(
+            self.Model.objects.annotate(
+                word_distance=TrigramStrictWordDistance("matt", "field"),
+            )
+            .filter(word_distance__lte=0.7)
+            .values("field", "word_distance")
+            .order_by("word_distance"),
+            [
+                {"field": "Cat sat on mat.", "word_distance": 0.5},
+                {"field": "Matthew", "word_distance": 0.5555556},
+            ],
+        )
+
 
 class TrigramTextFieldTest(TrigramTest):
     """