|
@@ -142,15 +142,17 @@ class BrokenLinkEmailsMiddleware(object):
|
|
|
domain = request.get_host()
|
|
|
path = request.get_full_path()
|
|
|
referer = force_text(request.META.get('HTTP_REFERER', ''), errors='replace')
|
|
|
- is_internal = self.is_internal_request(domain, referer)
|
|
|
- is_not_search_engine = '?' not in referer
|
|
|
- is_ignorable = self.is_ignorable_404(path)
|
|
|
- if referer and (is_internal or is_not_search_engine) and not is_ignorable:
|
|
|
+
|
|
|
+ if not self.is_ignorable_request(request, path, domain, referer):
|
|
|
ua = request.META.get('HTTP_USER_AGENT', '<none>')
|
|
|
ip = request.META.get('REMOTE_ADDR', '<none>')
|
|
|
mail_managers(
|
|
|
- "Broken %slink on %s" % (('INTERNAL ' if is_internal else ''), domain),
|
|
|
- "Referrer: %s\nRequested URL: %s\nUser agent: %s\nIP address: %s\n" % (referer, path, ua, ip),
|
|
|
+ "Broken %slink on %s" % (
|
|
|
+ ('INTERNAL ' if self.is_internal_request(domain, referer) else ''),
|
|
|
+ domain
|
|
|
+ ),
|
|
|
+ "Referrer: %s\nRequested URL: %s\nUser agent: %s\n"
|
|
|
+ "IP address: %s\n" % (referer, path, ua, ip),
|
|
|
fail_silently=True)
|
|
|
return response
|
|
|
|
|
@@ -159,10 +161,14 @@ class BrokenLinkEmailsMiddleware(object):
|
|
|
Returns True if the referring URL is the same domain as the current request.
|
|
|
"""
|
|
|
# Different subdomains are treated as different domains.
|
|
|
- return re.match("^https?://%s/" % re.escape(domain), referer)
|
|
|
+ return bool(re.match("^https?://%s/" % re.escape(domain), referer))
|
|
|
|
|
|
- def is_ignorable_404(self, uri):
|
|
|
+ def is_ignorable_request(self, request, uri, domain, referer):
|
|
|
"""
|
|
|
- Returns True if a 404 at the given URL *shouldn't* notify the site managers.
|
|
|
+ Returns True if the given request *shouldn't* notify the site managers.
|
|
|
"""
|
|
|
+ # '?' in referer is identified as search engine source
|
|
|
+ if (not referer or
|
|
|
+ (not self.is_internal_request(domain, referer) and '?' in referer)):
|
|
|
+ return True
|
|
|
return any(pattern.search(uri) for pattern in settings.IGNORABLE_404_URLS)
|