Browse Source

Fixed #24938 -- Added PostgreSQL trigram support.

Matthew Somerville 9 years ago
parent
commit
1962a96a30

+ 3 - 1
django/contrib/postgres/apps.py

@@ -3,7 +3,7 @@ from django.db.backends.signals import connection_created
 from django.db.models import CharField, TextField
 from django.utils.translation import ugettext_lazy as _
 
-from .lookups import SearchLookup, Unaccent
+from .lookups import SearchLookup, TrigramSimilar, Unaccent
 from .signals import register_hstore_handler
 
 
@@ -17,3 +17,5 @@ class PostgresConfig(AppConfig):
         TextField.register_lookup(Unaccent)
         CharField.register_lookup(SearchLookup)
         TextField.register_lookup(SearchLookup)
+        CharField.register_lookup(TrigramSimilar)
+        TextField.register_lookup(TrigramSimilar)

+ 5 - 0
django/contrib/postgres/lookups.py

@@ -60,3 +60,8 @@ class SearchLookup(SearchVectorExact):
             self.lhs = SearchVector(self.lhs)
         lhs, lhs_params = super(SearchLookup, self).process_lhs(qn, connection)
         return lhs, lhs_params
+
+
+class TrigramSimilar(PostgresSimpleLookup):
+    lookup_name = 'trigram_similar'
+    operator = '%%'

+ 6 - 0
django/contrib/postgres/operations.py

@@ -40,3 +40,9 @@ class UnaccentExtension(CreateExtension):
 
     def __init__(self):
         self.name = 'unaccent'
+
+
+class TrigramExtension(CreateExtension):
+
+    def __init__(self):
+        self.name = 'pg_trgm'

+ 16 - 0
django/contrib/postgres/search.py

@@ -185,3 +185,19 @@ class SearchRank(Func):
 
 
 SearchVectorField.register_lookup(SearchVectorExact)
+
+
+class TrigramBase(Func):
+    def __init__(self, expression, string, **extra):
+        if not hasattr(string, 'resolve_expression'):
+            string = Value(string)
+        super(TrigramBase, self).__init__(expression, string, output_field=FloatField(), **extra)
+
+
+class TrigramSimilarity(TrigramBase):
+    function = 'SIMILARITY'
+
+
+class TrigramDistance(TrigramBase):
+    function = ''
+    arg_joiner = ' <-> '

+ 26 - 0
docs/ref/contrib/postgres/lookups.txt

@@ -2,6 +2,32 @@
 PostgreSQL specific lookups
 ===========================
 
+Trigram similarity
+==================
+
+.. fieldlookup:: trigram_similar
+
+.. versionadded:: 1.10
+
+The ``trigram_similar`` lookup allows you to perform trigram lookups,
+measuring the number of trigrams (three consecutive characters) shared, using a
+dedicated PostgreSQL extension. A trigram lookup is given an expression and
+returns results that have a similarity measurement greater than the current
+similarity threshold.
+
+To use it, add ``'django.contrib.postgres'`` in your :setting:`INSTALLED_APPS`
+and activate the `pg_trgm extension
+<http://www.postgresql.org/docs/current/interactive/pgtrgm.html>`_ on
+PostgreSQL. You can install the extension using the
+:class:`~django.contrib.postgres.operations.TrigramExtension` migration
+operation.
+
+The ``trigram_similar`` lookup can be used on
+:class:`~django.db.models.CharField` and :class:`~django.db.models.TextField`::
+
+    >>> City.objects.filter(name__trigram_similar="Middlesborough")
+    ['<City: Middlesbrough>']
+
 ``Unaccent``
 ============
 

+ 10 - 0
docs/ref/contrib/postgres/operations.txt

@@ -27,6 +27,16 @@ the ``django.contrib.postgres.operations`` module.
     which will install the ``hstore`` extension and also immediately set up the
     connection to interpret hstore data.
 
+``TrigramExtension``
+====================
+
+.. class:: TrigramExtension()
+
+    .. versionadded:: 1.10
+
+    A subclass of :class:`~django.contrib.postgres.operations.CreateExtension`
+    that installs the ``pg_trgm`` extension.
+
 ``UnaccentExtension``
 =====================
 

+ 55 - 0
docs/ref/contrib/postgres/search.txt

@@ -189,3 +189,58 @@ if it were an annotated ``SearchVector``::
     [<Entry: Cheese on Toast recipes>, <Entry: Pizza recipes>]
 
 .. _PostgreSQL documentation: http://www.postgresql.org/docs/current/static/textsearch-features.html#TEXTSEARCH-UPDATE-TRIGGERS
+
+Trigram similarity
+==================
+
+Another approach to searching is trigram similarity. A trigram is a group of
+three consecutive characters. In addition to the :lookup:`trigram_similar`
+lookup, you can use a couple of other expressions.
+
+To use them, you need to activate the `pg_trgm extension
+<http://www.postgresql.org/docs/current/interactive/pgtrgm.html>`_ on
+PostgreSQL. You can install it using the
+:class:`~django.contrib.postgres.operations.TrigramExtension` migration
+operation.
+
+``TrigramSimilarity``
+---------------------
+
+.. class:: TrigramSimilarity(expression, string, **extra)
+
+.. versionadded:: 1.10
+
+Accepts a field name or expression, and a string or expression. Returns the
+trigram similarity between the two arguments.
+
+Usage example::
+
+    >>> from django.contrib.postgres.search import TrigramSimilarity
+    >>> Author.objects.create(name='Katy Stevens')
+    >>> Author.objects.create(name='Stephen Keats')
+    >>> test = 'Katie Stephens'
+    >>> Author.objects.annotate(
+    ...     similarity=TrigramSimilarity('name', test),
+    ... ).filter(similarity__gt=0.3).order_by('-similarity')
+    [<Author: Katy Stephens>, <Author: Stephen Keats>]
+
+``TrigramDistance``
+-------------------
+
+.. class:: TrigramDistance(expression, string, **extra)
+
+.. versionadded:: 1.10
+
+Accepts a field name or expression, and a string or expression. Returns the
+trigram distance between the two arguments.
+
+Usage example::
+
+    >>> from django.contrib.postgres.search import TrigramDistance
+    >>> Author.objects.create(name='Katy Stevens')
+    >>> Author.objects.create(name='Stephen Keats')
+    >>> test = 'Katie Stephens'
+    >>> Author.objects.annotate(
+    ...     distance=TrigramDistance('name', test),
+    ... ).filter(distance__lte=0.7).order_by('distance')
+    [<Author: Katy Stephens>, <Author: Stephen Keats>]

+ 4 - 0
docs/releases/1.10.txt

@@ -33,6 +33,10 @@ search engine. You can search across multiple fields in your relational
 database, combine the searches with other lookups, use different language
 configurations and weightings, and rank the results by relevance.
 
+It also now includes trigram support, using the :lookup:`trigram_similar`
+lookup, and the :class:`~django.contrib.postgres.search.TrigramSimilarity` and
+:class:`~django.contrib.postgres.search.TrigramDistance` expressions.
+
 Minor features
 --------------
 

+ 3 - 2
docs/topics/db/search.txt

@@ -55,11 +55,12 @@ use :lookup:`unaccented comparison <unaccent>`::
 This shows another issue, where we are matching against a different spelling of
 the name. In this case we have an asymmetry though - a search for ``Helen``
 will pick up ``Helena`` or ``Hélène``, but not the reverse. Another option
-would be to use a trigram comparison, which compares sequences of letters.
+would be to use a :lookup:`trigram_similar` comparison, which compares
+sequences of letters.
 
 For example::
 
-    >>> Author.objects.filter(name__unaccent__lower__trigram='Hélène')
+    >>> Author.objects.filter(name__unaccent__lower__trigram_similar='Hélène')
     [<Author: Helen Mirren>, <Actor: Hélène Joy>]
 
 Now we have a different problem - the longer name of "Helena Bonham Carter"

+ 3 - 1
tests/postgres_tests/migrations/0001_setup_extensions.py

@@ -5,12 +5,13 @@ from django.db import migrations
 
 try:
     from django.contrib.postgres.operations import (
-        CreateExtension, HStoreExtension, UnaccentExtension,
+        CreateExtension, HStoreExtension, TrigramExtension, UnaccentExtension,
     )
 except ImportError:
     from django.test import mock
     CreateExtension = mock.Mock()
     HStoreExtension = mock.Mock()
+    TrigramExtension = mock.Mock()
     UnaccentExtension = mock.Mock()
 
 
@@ -21,5 +22,6 @@ class Migration(migrations.Migration):
         # dash in its name.
         CreateExtension('uuid-ossp'),
         HStoreExtension(),
+        TrigramExtension(),
         UnaccentExtension(),
     ]

+ 53 - 0
tests/postgres_tests/test_trigram.py

@@ -0,0 +1,53 @@
+from django.contrib.postgres.search import TrigramDistance, TrigramSimilarity
+from django.test import modify_settings
+
+from . import PostgreSQLTestCase
+from .models import CharFieldModel, TextFieldModel
+
+
+@modify_settings(INSTALLED_APPS={'append': 'django.contrib.postgres'})
+class TrigramTest(PostgreSQLTestCase):
+    Model = CharFieldModel
+
+    @classmethod
+    def setUpTestData(cls):
+        cls.Model.objects.bulk_create([
+            cls.Model(field='Matthew'),
+            cls.Model(field='Cat sat on mat.'),
+            cls.Model(field='Dog sat on rug.'),
+        ])
+
+    def test_trigram_search(self):
+        self.assertQuerysetEqual(
+            self.Model.objects.filter(field__trigram_similar='Mathew'),
+            ['Matthew'],
+            transform=lambda instance: instance.field,
+        )
+
+    def test_trigram_similarity(self):
+        search = 'Bat sat on cat.'
+        self.assertQuerysetEqual(
+            self.Model.objects.filter(
+                field__trigram_similar=search,
+            ).annotate(similarity=TrigramSimilarity('field', search)).order_by('-similarity'),
+            [('Cat sat on mat.', 0.625), ('Dog sat on rug.', 0.333333)],
+            transform=lambda instance: (instance.field, instance.similarity),
+            ordered=True,
+        )
+
+    def test_trigram_similarity_alternate(self):
+        self.assertQuerysetEqual(
+            self.Model.objects.annotate(
+                distance=TrigramDistance('field', 'Bat sat on cat.'),
+            ).filter(distance__lte=0.7).order_by('distance'),
+            [('Cat sat on mat.', 0.375), ('Dog sat on rug.', 0.666667)],
+            transform=lambda instance: (instance.field, instance.distance),
+            ordered=True,
+        )
+
+
+class TrigramTextFieldTest(TrigramTest):
+    """
+    TextField has the same behavior as CharField regarding trigram lookups.
+    """
+    Model = TextFieldModel