Explorar o código

Fixed #23423 -- Added unaccent lookup in django.contrib.postgres

Thomas Chaumeny %!s(int64=10) %!d(string=hai) anos
pai
achega
17fe0bd808

+ 1 - 0
AUTHORS

@@ -642,6 +642,7 @@ answer newbie questions, and generally made Django that much better:
     Terry Huang <terryh.tp@gmail.com>
     thebjorn <bp@datakortet.no>
     Thejaswi Puthraya <thejaswi.puthraya@gmail.com>
+    Thomas Chaumeny <t.chaumeny@gmail.com>
     Thomas Güttler <hv@tbz-pariv.de>
     Thomas Kerpe <thomas@kerpe.net>
     Thomas Sorrel

+ 4 - 0
django/contrib/postgres/apps.py

@@ -1,7 +1,9 @@
 from django.apps import AppConfig
 from django.db.backends.signals import connection_created
+from django.db.models import CharField, TextField
 from django.utils.translation import ugettext_lazy as _
 
+from .lookups import Unaccent
 from .signals import register_hstore_handler
 
 
@@ -11,3 +13,5 @@ class PostgresConfig(AppConfig):
 
     def ready(self):
         connection_created.connect(register_hstore_handler)
+        CharField.register_lookup(Unaccent)
+        TextField.register_lookup(Unaccent)

+ 10 - 0
django/contrib/postgres/lookups.py

@@ -0,0 +1,10 @@
+from django.db.models import Transform
+
+
+class Unaccent(Transform):
+    bilateral = True
+    lookup_name = 'unaccent'
+
+    def as_postgresql(self, compiler, connection):
+        lhs, params = compiler.compile(self.lhs)
+        return "UNACCENT(%s)" % lhs, params

+ 6 - 0
django/contrib/postgres/operations.py

@@ -32,3 +32,9 @@ class HStoreExtension(CreateExtension):
         # extension is installed, a subsequent data migration would use the
         # same connection
         register_hstore_handler(schema_editor.connection)
+
+
+class UnaccentExtension(CreateExtension):
+
+    def __init__(self):
+        self.name = 'unaccent'

+ 1 - 0
docs/ref/contrib/postgres/index.txt

@@ -26,5 +26,6 @@ a number of PostgreSQL specific data types.
 
     fields
     forms
+    lookups
     operations
     validators

+ 36 - 0
docs/ref/contrib/postgres/lookups.txt

@@ -0,0 +1,36 @@
+===========================
+PostgreSQL specific lookups
+===========================
+
+Unaccent
+========
+
+.. fieldlookup:: unaccent
+
+The ``unaccent`` lookup allows you to perform accent-insensitive lookups using
+a dedicated PostgreSQL extension.
+
+This lookup is implemented using :class:`~django.db.models.Transform`, so it
+can be chained with other lookup functions. To use it, you need to add
+``'django.contrib.postgres'`` in your :setting:`INSTALLED_APPS` and activate
+the `unaccent extension on PostgreSQL`_. The
+:class:`~django.contrib.postgres.operations.UnaccentExtension` migration
+operation is available if you want to perform this activation using migrations).
+
+.. _unaccent extension on PostgreSQL: http://www.postgresql.org/docs/current/interactive/unaccent.html
+
+The ``unaccent`` lookup can be used on
+:class:`~django.db.models.CharField` and :class:`~django.db.models.TextField`::
+
+    >>> City.objects.filter(name__unaccent="México")
+    ['<City: Mexico>']
+
+    >>> User.objects.filter(first_name__unaccent__startswith="Jerem")
+    ['<User: Jeremy>', '<User: Jérémy>', '<User: Jérémie>', '<User: Jeremie>']
+
+.. warning::
+
+    ``unaccent`` lookups should perform fine in most use cases. However, queries
+    using this filter will generally perform full table scans, which can be slow
+    on large tables. In those cases, using dedicated full text indexing tools
+    might be appropriate.

+ 8 - 0
docs/ref/contrib/postgres/operations.txt

@@ -25,3 +25,11 @@ HStoreExtension
     A subclass of :class:`~django.contrib.postgres.operations.CreateExtension`
     which will install the ``hstore`` extension and also immediately set up the
     connection to interpret hstore data.
+
+UnaccentExtension
+-----------------
+
+.. class:: UnaccentExtension()
+
+    A subclass of :class:`~django.contrib.postgres.operations.CreateExtension`
+    which will install the ``unaccent`` extension.

+ 4 - 3
docs/releases/1.8.txt

@@ -45,9 +45,10 @@ New PostgreSQL specific functionality
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 Django now has a module with extensions for PostgreSQL specific features, such
-as :class:`~django.contrib.postgres.fields.ArrayField` and
-:class:`~django.contrib.postgres.fields.HStoreField`. A full breakdown of the
-features is available :doc:`in the documentation</ref/contrib/postgres/index>`.
+as :class:`~django.contrib.postgres.fields.ArrayField`,
+:class:`~django.contrib.postgres.fields.HStoreField`, and :lookup:`unaccent`
+lookup. A full breakdown of the features is available :doc:`in the
+documentation </ref/contrib/postgres/index>`.
 
 New data types
 ~~~~~~~~~~~~~~

+ 2 - 1
tests/postgres_tests/migrations/0001_setup_extensions.py

@@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 from __future__ import unicode_literals
 
-from django.contrib.postgres.operations import HStoreExtension
+from django.contrib.postgres.operations import HStoreExtension, UnaccentExtension
 from django.db import migrations
 
 
@@ -12,4 +12,5 @@ class Migration(migrations.Migration):
 
     operations = [
         HStoreExtension(),
+        UnaccentExtension(),
     ]

+ 18 - 0
tests/postgres_tests/migrations/0002_create_test_models.py

@@ -73,4 +73,22 @@ class Migration(migrations.Migration):
             },
             bases=(models.Model,),
         ),
+        migrations.CreateModel(
+            name='CharFieldModel',
+            fields=[
+                ('id', models.AutoField(verbose_name='ID', serialize=False, auto_created=True, primary_key=True)),
+                ('field', models.CharField(max_length=16)),
+            ],
+            options=None,
+            bases=None,
+        ),
+        migrations.CreateModel(
+            name='TextFieldModel',
+            fields=[
+                ('id', models.AutoField(verbose_name='ID', serialize=False, auto_created=True, primary_key=True)),
+                ('field', models.TextField()),
+            ],
+            options=None,
+            bases=None,
+        ),
     ]

+ 8 - 0
tests/postgres_tests/models.py

@@ -24,3 +24,11 @@ class NestedIntegerArrayModel(models.Model):
 
 class HStoreModel(models.Model):
     field = HStoreField(blank=True, null=True)
+
+
+class CharFieldModel(models.Model):
+    field = models.CharField(max_length=16)
+
+
+class TextFieldModel(models.Model):
+    field = models.TextField()

+ 65 - 0
tests/postgres_tests/test_unaccent.py

@@ -0,0 +1,65 @@
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+
+import unittest
+
+from django.db import connection
+from django.test import TestCase, modify_settings
+
+from .models import CharFieldModel, TextFieldModel
+
+
+@unittest.skipUnless(connection.vendor == 'postgresql', 'PostgreSQL required')
+@modify_settings(INSTALLED_APPS={'append': 'django.contrib.postgres'})
+class UnaccentTest(TestCase):
+
+    Model = CharFieldModel
+
+    def setUp(self):
+        self.Model.objects.bulk_create([
+            self.Model(field="àéÖ"),
+            self.Model(field="aeO"),
+            self.Model(field="aeo"),
+        ])
+
+    def test_unaccent(self):
+        self.assertQuerysetEqual(
+            self.Model.objects.filter(field__unaccent="aeO"),
+            ["àéÖ", "aeO"],
+            transform=lambda instance: instance.field,
+            ordered=False
+        )
+
+    def test_unaccent_chained(self):
+        """
+        Check that unaccent can be used chained with a lookup (which should be
+        the case since unaccent implements the Transform API)
+        """
+        self.assertQuerysetEqual(
+            self.Model.objects.filter(field__unaccent__iexact="aeO"),
+            ["àéÖ", "aeO", "aeo"],
+            transform=lambda instance: instance.field,
+            ordered=False
+        )
+        self.assertQuerysetEqual(
+            self.Model.objects.filter(field__unaccent__endswith="éÖ"),
+            ["àéÖ", "aeO"],
+            transform=lambda instance: instance.field,
+            ordered=False
+        )
+
+    def test_unaccent_accentuated_needle(self):
+        self.assertQuerysetEqual(
+            self.Model.objects.filter(field__unaccent="aéÖ"),
+            ["àéÖ", "aeO"],
+            transform=lambda instance: instance.field,
+            ordered=False
+        )
+
+
+class UnaccentTextFieldTest(UnaccentTest):
+    """
+    TextField should have the exact same behavior as CharField
+    regarding unaccent lookups.
+    """
+    Model = TextFieldModel