Browse Source

Fixed #32492 -- Added TrigramWordSimilarity() and TrigramWordDistance() on PostgreSQL.

Nikita Marchant 3 years ago
parent
commit
4e4082f939

+ 1 - 0
AUTHORS

@@ -710,6 +710,7 @@ answer newbie questions, and generally made Django that much better:
     Nicola Larosa <nico@teknico.net>
     Nicolas Lara <nicolaslara@gmail.com>
     Nicolas Noé <nicolas@niconoe.eu>
+    Nikita Marchant <nikita.marchant@gmail.com>
     Niran Babalola <niran@niran.org>
     Nis Jørgensen <nis@superlativ.dk>
     Nowell Strite <https://nowell.strite.org/>

+ 5 - 1
django/contrib/postgres/apps.py

@@ -13,7 +13,7 @@ from django.test.signals import setting_changed
 from django.utils.translation import gettext_lazy as _
 
 from .indexes import OpClass
-from .lookups import SearchLookup, TrigramSimilar, Unaccent
+from .lookups import SearchLookup, TrigramSimilar, TrigramWordSimilar, Unaccent
 from .serializers import RangeSerializer
 from .signals import register_type_handlers
 
@@ -33,6 +33,8 @@ def uninstall_if_needed(setting, value, enter, **kwargs):
         TextField._unregister_lookup(SearchLookup)
         CharField._unregister_lookup(TrigramSimilar)
         TextField._unregister_lookup(TrigramSimilar)
+        CharField._unregister_lookup(TrigramWordSimilar)
+        TextField._unregister_lookup(TrigramWordSimilar)
         # Disconnect this receiver until the next time this app is installed
         # and ready() connects it again to prevent unnecessary processing on
         # each setting change.
@@ -65,5 +67,7 @@ class PostgresConfig(AppConfig):
         TextField.register_lookup(SearchLookup)
         CharField.register_lookup(TrigramSimilar)
         TextField.register_lookup(TrigramSimilar)
+        CharField.register_lookup(TrigramWordSimilar)
+        TextField.register_lookup(TrigramWordSimilar)
         MigrationWriter.register_serializer(RANGE_TYPES, RangeSerializer)
         IndexExpression.register_wrappers(OrderBy, OpClass, Collate)

+ 5 - 0
django/contrib/postgres/lookups.py

@@ -58,3 +58,8 @@ class SearchLookup(SearchVectorExact):
 class TrigramSimilar(PostgresOperatorLookup):
     lookup_name = 'trigram_similar'
     postgres_operator = '%%'
+
+
+class TrigramWordSimilar(PostgresOperatorLookup):
+    lookup_name = 'trigram_word_similar'
+    postgres_operator = '%%>'

+ 18 - 0
django/contrib/postgres/search.py

@@ -293,6 +293,15 @@ class TrigramBase(Func):
         super().__init__(expression, string, **extra)
 
 
+class TrigramWordBase(Func):
+    output_field = FloatField()
+
+    def __init__(self, string, expression, **extra):
+        if not hasattr(string, 'resolve_expression'):
+            string = Value(string)
+        super().__init__(string, expression, **extra)
+
+
 class TrigramSimilarity(TrigramBase):
     function = 'SIMILARITY'
 
@@ -300,3 +309,12 @@ class TrigramSimilarity(TrigramBase):
 class TrigramDistance(TrigramBase):
     function = ''
     arg_joiner = ' <-> '
+
+
+class TrigramWordDistance(TrigramWordBase):
+    function = ''
+    arg_joiner = ' <<-> '
+
+
+class TrigramWordSimilarity(TrigramWordBase):
+    function = 'WORD_SIMILARITY'

+ 27 - 3
docs/ref/contrib/postgres/lookups.txt

@@ -14,9 +14,8 @@ returns results that have a similarity measurement greater than the current
 similarity threshold.
 
 To use it, add ``'django.contrib.postgres'`` in your :setting:`INSTALLED_APPS`
-and activate the `pg_trgm extension
-<https://www.postgresql.org/docs/current/pgtrgm.html>`_ on PostgreSQL. You can
-install the extension using the
+and activate the `pg_trgm extension`_ on PostgreSQL. You can install the
+extension using the
 :class:`~django.contrib.postgres.operations.TrigramExtension` migration
 operation.
 
@@ -26,6 +25,31 @@ The ``trigram_similar`` lookup can be used on
     >>> City.objects.filter(name__trigram_similar="Middlesborough")
     ['<City: Middlesbrough>']
 
+.. fieldlookup:: trigram_word_similar
+
+.. versionadded:: 4.0
+
+The ``trigram_word_similar`` lookup allows you to perform trigram word
+similarity lookups using a dedicated PostgreSQL extension. It can be
+approximately understood as measuring the greatest number of trigrams shared
+between the parameter and any substring of the field. A trigram word lookup is
+given an expression and returns results that have a word similarity measurement
+greater than the current similarity threshold.
+
+To use it, add ``'django.contrib.postgres'`` in your :setting:`INSTALLED_APPS`
+and activate the `pg_trgm extension`_ on PostgreSQL. You can install the
+extension using the
+:class:`~django.contrib.postgres.operations.TrigramExtension` migration
+operation.
+
+The ``trigram_word_similar`` lookup can be used on
+:class:`~django.db.models.CharField` and :class:`~django.db.models.TextField`::
+
+    >>> Sentence.objects.filter(name__trigram_word_similar='Middlesborough')
+    ['<Sentence: Gumby rides on the path of Middlesbrough>']
+
+.. _`pg_trgm extension`: https://www.postgresql.org/docs/current/pgtrgm.html
+
 ``Unaccent``
 ============
 

+ 45 - 2
docs/ref/contrib/postgres/search.txt

@@ -280,8 +280,9 @@ Trigram similarity
 ==================
 
 Another approach to searching is trigram similarity. A trigram is a group of
-three consecutive characters. In addition to the :lookup:`trigram_similar`
-lookup, you can use a couple of other expressions.
+three consecutive characters. In addition to the :lookup:`trigram_similar` and
+:lookup:`trigram_word_similar` lookups, you can use a couple of other
+expressions.
 
 To use them, you need to activate the `pg_trgm extension
 <https://www.postgresql.org/docs/current/pgtrgm.html>`_ on PostgreSQL. You can
@@ -308,6 +309,27 @@ Usage example::
     ... ).filter(similarity__gt=0.3).order_by('-similarity')
     [<Author: Katy Stevens>, <Author: Stephen Keats>]
 
+``TrigramWordSimilarity``
+-------------------------
+
+.. versionadded:: 4.0
+
+.. class:: TrigramWordSimilarity(string, expression, **extra)
+
+Accepts a string or expression, and a field name or expression. Returns the
+trigram word similarity between the two arguments.
+
+Usage example::
+
+    >>> from django.contrib.postgres.search import TrigramWordSimilarity
+    >>> Author.objects.create(name='Katy Stevens')
+    >>> Author.objects.create(name='Stephen Keats')
+    >>> test = 'Kat'
+    >>> Author.objects.annotate(
+    ...     similarity=TrigramWordSimilarity(test, 'name'),
+    ... ).filter(similarity__gt=0.3).order_by('-similarity')
+    [<Author: Katy Stevens>]
+
 ``TrigramDistance``
 -------------------
 
@@ -326,3 +348,24 @@ Usage example::
     ...     distance=TrigramDistance('name', test),
     ... ).filter(distance__lte=0.7).order_by('distance')
     [<Author: Katy Stevens>, <Author: Stephen Keats>]
+
+``TrigramWordDistance``
+-----------------------
+
+.. versionadded:: 4.0
+
+.. class:: TrigramWordDistance(string, expression, **extra)
+
+Accepts a string or expression, and a field name or expression. Returns the
+trigram word distance between the two arguments.
+
+Usage example::
+
+    >>> from django.contrib.postgres.search import TrigramWordDistance
+    >>> Author.objects.create(name='Katy Stevens')
+    >>> Author.objects.create(name='Stephen Keats')
+    >>> test = 'Kat'
+    >>> Author.objects.annotate(
+    ...     distance=TrigramWordDistance(test, 'name'),
+    ... ).filter(distance__lte=0.7).order_by('distance')
+    [<Author: Katy Stevens>]

+ 7 - 0
docs/releases/4.0.txt

@@ -200,6 +200,13 @@ Minor features
   expression allows using subqueries to construct lists of values on
   PostgreSQL.
 
+* The new :lookup:`trigram_word_similar` lookup, and the
+  :class:`TrigramWordDistance()
+  <django.contrib.postgres.search.TrigramWordDistance>` and
+  :class:`TrigramWordSimilarity()
+  <django.contrib.postgres.search.TrigramWordSimilarity>` expressions allow
+  using trigram word similarity.
+
 :mod:`django.contrib.redirects`
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 

+ 1 - 1
tests/postgres_tests/migrations/0002_create_test_models.py

@@ -110,7 +110,7 @@ class Migration(migrations.Migration):
             name='CharFieldModel',
             fields=[
                 ('id', models.AutoField(verbose_name='ID', serialize=False, auto_created=True, primary_key=True)),
-                ('field', models.CharField(max_length=16)),
+                ('field', models.CharField(max_length=64)),
             ],
             options=None,
             bases=None,

+ 1 - 1
tests/postgres_tests/models.py

@@ -83,7 +83,7 @@ class ArrayEnumModel(PostgreSQLModel):
 
 
 class CharFieldModel(models.Model):
-    field = models.CharField(max_length=16)
+    field = models.CharField(max_length=64)
 
 
 class TextFieldModel(models.Model):

+ 38 - 1
tests/postgres_tests/test_trigram.py

@@ -5,7 +5,8 @@ from .models import CharFieldModel, TextFieldModel
 
 try:
     from django.contrib.postgres.search import (
-        TrigramDistance, TrigramSimilarity,
+        TrigramDistance, TrigramSimilarity, TrigramWordDistance,
+        TrigramWordSimilarity,
     )
 except ImportError:
     pass
@@ -30,6 +31,15 @@ class TrigramTest(PostgreSQLTestCase):
             transform=lambda instance: instance.field,
         )
 
+    def test_trigram_word_search(self):
+        obj = self.Model.objects.create(
+            field='Gumby rides on the path of Middlesbrough',
+        )
+        self.assertSequenceEqual(
+            self.Model.objects.filter(field__trigram_word_similar='Middlesborough'),
+            [obj],
+        )
+
     def test_trigram_similarity(self):
         search = 'Bat sat on cat.'
         # Round result of similarity because PostgreSQL 12+ uses greater
@@ -43,6 +53,20 @@ class TrigramTest(PostgreSQLTestCase):
             ordered=True,
         )
 
+    def test_trigram_word_similarity(self):
+        search = 'mat'
+        self.assertSequenceEqual(
+            self.Model.objects.filter(
+                field__trigram_word_similar=search,
+            ).annotate(
+                word_similarity=TrigramWordSimilarity(search, 'field'),
+            ).values('field', 'word_similarity').order_by('-word_similarity'),
+            [
+                {'field': 'Cat sat on mat.', 'word_similarity': 1.0},
+                {'field': 'Matthew', 'word_similarity': 0.75},
+            ],
+        )
+
     def test_trigram_similarity_alternate(self):
         # Round result of distance because PostgreSQL 12+ uses greater
         # precision.
@@ -55,6 +79,19 @@ class TrigramTest(PostgreSQLTestCase):
             ordered=True,
         )
 
+    def test_trigram_word_similarity_alternate(self):
+        self.assertSequenceEqual(
+            self.Model.objects.annotate(
+                word_distance=TrigramWordDistance('mat', 'field'),
+            ).filter(
+                word_distance__lte=0.7,
+            ).values('field', 'word_distance').order_by('word_distance'),
+            [
+                {'field': 'Cat sat on mat.', 'word_distance': 0},
+                {'field': 'Matthew', 'word_distance': 0.25},
+            ],
+        )
+
 
 class TrigramTextFieldTest(TrigramTest):
     """