Browse Source

Allow frontend caching backends to limit which hostnames they respond to

Jake Howard 11 months ago
parent
commit
c00c2b684f

+ 2 - 0
docs/advanced_topics/performance.md

@@ -68,6 +68,8 @@ Many websites use a frontend cache such as Varnish, Squid, Cloudflare or CloudFr
 
 Wagtail supports being [integrated](frontend_cache_purging) with many CDNs, so it can inform them when a page changes, so the cache can be cleared immediately and users see the changes sooner.
 
+If you have multiple frontends configured (eg Cloudflare for one site, CloudFront for another), it's recommended to set the [`HOSTNAMES`](frontendcache_multiple_backends) key to the list of hostnames the backend can purge, to prevent unnecessary extra purge requests.
+
 ## Page URLs
 
 To fully resolve the URL of a page, Wagtail requires information from a few different sources.

+ 50 - 18
docs/reference/contrib/frontendcache.md

@@ -121,24 +121,6 @@ Configuration of credentials can done in multiple ways. You won't need to store
 }
 ```
 
-In case you run multiple sites with Wagtail and each site has its CloudFront distribution, provide a mapping instead of a single distribution. Make sure the mapping matches with the hostnames provided in your site settings.
-
-```python
-WAGTAILFRONTENDCACHE = {
-    'cloudfront': {
-        'BACKEND': 'wagtail.contrib.frontend_cache.backends.CloudfrontBackend',
-        'DISTRIBUTION_ID': {
-            'www.wagtail.org': 'your-distribution-id',
-            'www.madewithwagtail.org': 'your-distribution-id',
-        },
-    },
-}
-```
-
-```{note}
-In most cases, absolute URLs with ``www`` prefixed domain names should be used in your mapping. Only drop the ``www`` prefix if you're absolutely sure you're not using it (for example a subdomain).
-```
-
 ### Azure CDN
 
 With [Azure CDN](https://azure.microsoft.com/en-gb/products/cdn/) you will need a CDN profile with an endpoint configured.
@@ -236,6 +218,56 @@ WAGTAILFRONTENDCACHE = {
 
 Another option that can be set is `SUBSCRIPTION_ID`. By default the first encountered subscription will be used, but if your credential has access to more subscriptions, you should set this to an explicit value.
 
+(frontendcache_multiple_backends)=
+
+## Multiple backends
+
+Multiple backends can be configured by adding multiple entries in `WAGTAILFRONTENDCACHE`.
+
+By default, a backend will attempt to invalidate all invalidation requests. To only invalidate certain hostnames, specify them in `HOSTNAMES`:
+
+```python
+WAGTAILFRONTENDCACHE = {
+    'main-site': {
+        'BACKEND': 'wagtail.contrib.frontend_cache.backends.HTTPBackend',
+        'LOCATION': 'http://localhost:8000',
+        'HOSTNAMES': ['example.com']
+    },
+    'cdn': {
+        'BACKEND': 'wagtail.contrib.frontend_cache.backends.CloudflareBackend',
+        'BEARER_TOKEN': 'your cloudflare bearer token',
+        'ZONEID': 'your cloudflare domain zone id',
+        'HOSTNAMES': ['cdn.example.com']
+    },
+}
+```
+
+In the above example, invalidations for `cdn.example.com/foo` will be invalidated by Cloudflare, whilst `example.com/foo` will be invalidated with the `main-site` backend. This allows different configuration to be used for each backend, for example by changing the `ZONEID` for the Cloudflare backend:
+
+```python
+
+WAGTAILFRONTENDCACHE = {
+    'main-site': {
+        'BACKEND': 'wagtail.contrib.frontend_cache.backends.CloudflareBackend',
+        'BEARER_TOKEN': os.environ["CLOUDFLARE_BEARER_TOKEN"],
+        'ZONEID': 'example.com zone id',
+        'HOSTNAMES': ['example.com']
+    },
+    'other-site': {
+        'BACKEND': 'wagtail.contrib.frontend_cache.backends.CloudflareBackend',
+        'BEARER_TOKEN': os.environ["CLOUDFLARE_BEARER_TOKEN"],
+        'ZONEID': 'example.net zone id',
+        'HOSTNAMES': ['example.net']
+    },
+}
+```
+
+```{note}
+In most cases, absolute URLs with ``www`` prefixed domain names should be used in your mapping. Only drop the ``www`` prefix if you're absolutely sure you're not using it (for example a subdomain).
+```
+
+Much like Django's `ALLOWED_HOSTS`, values in `HOSTNAMES` starting with a `.` can be used as a subdomain wildcard.
+
 ## Advanced usage
 
 ### Invalidating more than one URL per page

+ 32 - 1
wagtail/contrib/frontend_cache/backends.py

@@ -4,11 +4,14 @@ from collections import defaultdict
 from urllib.error import HTTPError, URLError
 from urllib.parse import urlparse, urlsplit, urlunparse, urlunsplit
 from urllib.request import Request, urlopen
+from warnings import warn
 
 import requests
 from django.core.exceptions import ImproperlyConfigured
+from django.http.request import validate_host
 
 from wagtail import __version__
+from wagtail.utils.deprecation import RemovedInWagtail70Warning
 
 logger = logging.getLogger("wagtail.frontendcache")
 
@@ -19,6 +22,10 @@ class PurgeRequest(Request):
 
 
 class BaseBackend:
+    def __init__(self, params):
+        # If unspecified, invalidate all hosts
+        self.hostnames = params.get("HOSTNAMES", ["*"])
+
     def purge(self, url):
         raise NotImplementedError
 
@@ -27,9 +34,16 @@ class BaseBackend:
         for url in urls:
             self.purge(url)
 
+    def invalidates_hostname(self, hostname):
+        """
+        Can `hostname` be invalidated by this backend?
+        """
+        return validate_host(hostname, self.hostnames)
+
 
 class HTTPBackend(BaseBackend):
     def __init__(self, params):
+        super().__init__(params)
         location_url_parsed = urlparse(params.pop("LOCATION"))
         self.cache_scheme = location_url_parsed.scheme
         self.cache_netloc = location_url_parsed.netloc
@@ -78,6 +92,8 @@ class CloudflareBackend(BaseBackend):
     CHUNK_SIZE = 30
 
     def __init__(self, params):
+        super().__init__(params)
+
         self.cloudflare_email = params.pop("EMAIL", None)
         self.cloudflare_api_key = params.pop("TOKEN", None) or params.pop(
             "API_KEY", None
@@ -174,6 +190,8 @@ class CloudfrontBackend(BaseBackend):
     def __init__(self, params):
         import boto3
 
+        super().__init__(params)
+
         self.client = boto3.client("cloudfront")
         try:
             self.cloudfront_distribution_id = params.pop("DISTRIBUTION_ID")
@@ -182,6 +200,14 @@ class CloudfrontBackend(BaseBackend):
                 "The setting 'WAGTAILFRONTENDCACHE' requires the object 'DISTRIBUTION_ID'."
             )
 
+        # Add known hostnames for hostname validation (if not already defined)
+        # RemovedInWagtail70Warning
+        if isinstance(self.cloudfront_distribution_id, dict):
+            if "HOSTNAMES" in params:
+                self.hostnames.extend(self.cloudfront_distribution_id.keys())
+            else:
+                self.hostnames = list(self.cloudfront_distribution_id.keys())
+
     def purge_batch(self, urls):
         paths_by_distribution_id = defaultdict(list)
 
@@ -190,11 +216,15 @@ class CloudfrontBackend(BaseBackend):
             distribution_id = None
 
             if isinstance(self.cloudfront_distribution_id, dict):
+                warn(
+                    "Using a `DISTRIBUTION_ID` mapping is deprecated - use `HOSTNAMES` in combination with multiple backends instead.",
+                    category=RemovedInWagtail70Warning,
+                )
                 host = url_parsed.hostname
                 if host in self.cloudfront_distribution_id:
                     distribution_id = self.cloudfront_distribution_id.get(host)
                 else:
-                    logger.info(
+                    logger.warning(
                         "Couldn't purge '%s' from CloudFront. Hostname '%s' not found in the DISTRIBUTION_ID mapping",
                         url,
                         host,
@@ -235,6 +265,7 @@ class CloudfrontBackend(BaseBackend):
 
 class AzureBaseBackend(BaseBackend):
     def __init__(self, params):
+        super().__init__(params)
         self._credentials = params.pop("CREDENTIALS", None)
         self._subscription_id = params.pop("SUBSCRIPTION_ID", None)
         try:

+ 44 - 10
wagtail/contrib/frontend_cache/tests.py

@@ -19,6 +19,7 @@ from wagtail.contrib.frontend_cache.backends import (
 from wagtail.contrib.frontend_cache.utils import get_backends
 from wagtail.models import Page
 from wagtail.test.testapp.models import EventIndex
+from wagtail.utils.deprecation import RemovedInWagtail70Warning
 
 from .utils import (
     PurgeBatch,
@@ -336,15 +337,31 @@ class TestBackendConfiguration(TestCase):
                 },
             }
         )
-        backends.get("cloudfront").purge(
-            "http://www.wagtail.org/home/events/christmas/"
-        )
-        backends.get("cloudfront").purge("http://torchbox.com/blog/")
+        with self.assertWarnsMessage(
+            RemovedInWagtail70Warning,
+            "Using a `DISTRIBUTION_ID` mapping is deprecated - use `HOSTNAMES` in combination with multiple backends instead.",
+        ):
+            backends.get("cloudfront").purge(
+                "http://www.wagtail.org/home/events/christmas/"
+            )
+
+        with self.assertWarnsMessage(
+            RemovedInWagtail70Warning,
+            "Using a `DISTRIBUTION_ID` mapping is deprecated - use `HOSTNAMES` in combination with multiple backends instead.",
+        ):
+            backends.get("cloudfront").purge("http://torchbox.com/blog/")
 
         _create_invalidation.assert_called_once_with(
             "frontend", ["/home/events/christmas/"]
         )
 
+        self.assertTrue(
+            backends.get("cloudfront").invalidates_hostname("www.wagtail.org")
+        )
+        self.assertFalse(
+            backends.get("cloudfront").invalidates_hostname("torchbox.com")
+        )
+
     def test_multiple(self):
         backends = get_backends(
             backend_settings={
@@ -396,17 +413,11 @@ PURGED_URLS = []
 
 
 class MockBackend(BaseBackend):
-    def __init__(self, config):
-        pass
-
     def purge(self, url):
         PURGED_URLS.append(url)
 
 
 class MockCloudflareBackend(CloudflareBackend):
-    def __init__(self, config):
-        pass
-
     def _purge_urls(self, urls):
         if len(urls) > self.CHUNK_SIZE:
             raise Exception("Cloudflare backend is not chunking requests as expected")
@@ -465,11 +476,34 @@ class TestCachePurgingFunctions(TestCase):
             ],
         )
 
+    @override_settings(
+        WAGTAILFRONTENDCACHE={
+            "varnish": {
+                "BACKEND": "wagtail.contrib.frontend_cache.tests.MockBackend",
+                "HOSTNAMES": ["example.com"],
+            },
+        }
+    )
+    def test_invalidate_specific_location(self):
+        with self.assertLogs(level="WARNING") as log_output:
+            purge_url_from_cache("http://localhost/foo")
+
+        self.assertEqual(PURGED_URLS, [])
+        self.assertIn(
+            "Unable to find purge backend for localhost",
+            log_output.output[0],
+        )
+
+        purge_url_from_cache("http://example.com/foo")
+        self.assertEqual(PURGED_URLS, ["http://example.com/foo"])
+
 
 @override_settings(
     WAGTAILFRONTENDCACHE={
         "cloudflare": {
             "BACKEND": "wagtail.contrib.frontend_cache.tests.MockCloudflareBackend",
+            "ZONEID": "zone",
+            "BEARER_TOKEN": "token",
         },
     }
 )

+ 23 - 4
wagtail/contrib/frontend_cache/utils.py

@@ -1,5 +1,6 @@
 import logging
 import re
+from collections import defaultdict
 from urllib.parse import urlparse, urlunparse
 
 from django.conf import settings
@@ -100,11 +101,29 @@ def purge_urls_from_cache(urls, backend_settings=None, backends=None):
 
         urls = new_urls
 
-    for backend_name, backend in get_backends(backend_settings, backends).items():
-        for url in urls:
-            logger.info("[%s] Purging URL: %s", backend_name, url)
+    urls_by_hostname = defaultdict(list)
 
-        backend.purge_batch(urls)
+    for url in urls:
+        urls_by_hostname[urlparse(url).netloc].append(url)
+
+    backends = get_backends(backend_settings, backends)
+
+    for hostname, urls in urls_by_hostname.items():
+        backends_for_hostname = {
+            backend_name: backend
+            for backend_name, backend in backends.items()
+            if backend.invalidates_hostname(hostname)
+        }
+
+        if not backends_for_hostname:
+            logger.warning("Unable to find purge backend for %s", hostname)
+            continue
+
+        for backend_name, backend in backends_for_hostname.items():
+            for url in urls:
+                logger.info("[%s] Purging URL: %s", backend_name, url)
+
+            backend.purge_batch(urls)
 
 
 def _get_page_cached_urls(page):