20 KB

  1. import json
  2. import os
  3. import posixpath
  4. import re
  5. from hashlib import md5
  6. from urllib.parse import unquote, urldefrag, urlsplit, urlunsplit
  7. from django.conf import STATICFILES_STORAGE_ALIAS, settings
  8. from django.contrib.staticfiles.utils import check_settings, matches_patterns
  9. from django.core.exceptions import ImproperlyConfigured
  10. from django.core.files.base import ContentFile
  11. from import FileSystemStorage, storages
  12. from django.utils.functional import LazyObject
  13. class StaticFilesStorage(FileSystemStorage):
  14. """
  15. Standard file system storage for static files.
  16. The defaults for ``location`` and ``base_url`` are
  17. ``STATIC_ROOT`` and ``STATIC_URL``.
  18. """
  19. def __init__(self, location=None, base_url=None, *args, **kwargs):
  20. if location is None:
  21. location = settings.STATIC_ROOT
  22. if base_url is None:
  23. base_url = settings.STATIC_URL
  24. check_settings(base_url)
  25. super().__init__(location, base_url, *args, **kwargs)
  26. # FileSystemStorage fallbacks to MEDIA_ROOT when location
  27. # is empty, so we restore the empty value.
  28. if not location:
  29. self.base_location = None
  30. self.location = None
  31. def path(self, name):
  32. if not self.location:
  33. raise ImproperlyConfigured(
  34. "You're using the staticfiles app "
  35. "without having set the STATIC_ROOT "
  36. "setting to a filesystem path."
  37. )
  38. return super().path(name)
  39. class HashedFilesMixin:
  40. default_template = """url("%(url)s")"""
  41. max_post_process_passes = 5
  42. patterns = (
  43. (
  44. "*.css",
  45. (
  46. r"""(?P<matched>url\(['"]{0,1}\s*(?P<url>.*?)["']{0,1}\))""",
  47. (
  48. r"""(?P<matched>@import\s*["']\s*(?P<url>.*?)["'])""",
  49. """@import url("%(url)s")""",
  50. ),
  51. (
  52. (
  53. r"(?m)(?P<matched>)^(/\*#[ \t]"
  54. r"(?-i:sourceMappingURL)=(?P<url>.*)[ \t]*\*/)$"
  55. ),
  56. "/*# sourceMappingURL=%(url)s */",
  57. ),
  58. ),
  59. ),
  60. (
  61. "*.js",
  62. (
  63. (
  64. r"(?m)(?P<matched>)^(//# (?-i:sourceMappingURL)=(?P<url>.*))$",
  65. "//# sourceMappingURL=%(url)s",
  66. ),
  67. (
  68. (
  69. r"""(?P<matched>import(?s:(?P<import>[\s\{].*?))"""
  70. r"""\s*from\s*['"](?P<url>[\.\/].*?)["']\s*;)"""
  71. ),
  72. """import%(import)s from "%(url)s";""",
  73. ),
  74. (
  75. (
  76. r"""(?P<matched>export(?s:(?P<exports>[\s\{].*?))"""
  77. r"""\s*from\s*["'](?P<url>[\.\/].*?)["']\s*;)"""
  78. ),
  79. """export%(exports)s from "%(url)s";""",
  80. ),
  81. (
  82. r"""(?P<matched>import\s*['"](?P<url>[\.\/].*?)["']\s*;)""",
  83. """import"%(url)s";""",
  84. ),
  85. (
  86. r"""(?P<matched>import\(["'](?P<url>.*?)["']\))""",
  87. """import("%(url)s")""",
  88. ),
  89. ),
  90. ),
  91. )
  92. keep_intermediate_files = True
  93. def __init__(self, *args, **kwargs):
  94. super().__init__(*args, **kwargs)
  95. self._patterns = {}
  96. self.hashed_files = {}
  97. for extension, patterns in self.patterns:
  98. for pattern in patterns:
  99. if isinstance(pattern, (tuple, list)):
  100. pattern, template = pattern
  101. else:
  102. template = self.default_template
  103. compiled = re.compile(pattern, re.IGNORECASE)
  104. self._patterns.setdefault(extension, []).append((compiled, template))
  105. def file_hash(self, name, content=None):
  106. """
  107. Return a hash of the file with the given name and optional content.
  108. """
  109. if content is None:
  110. return None
  111. hasher = md5(usedforsecurity=False)
  112. for chunk in content.chunks():
  113. hasher.update(chunk)
  114. return hasher.hexdigest()[:12]
  115. def hashed_name(self, name, content=None, filename=None):
  116. # `filename` is the name of file to hash if `content` isn't given.
  117. # `name` is the base name to construct the new hashed filename from.
  118. parsed_name = urlsplit(unquote(name))
  119. clean_name = parsed_name.path.strip()
  120. filename = (filename and urlsplit(unquote(filename)).path.strip()) or clean_name
  121. opened = content is None
  122. if opened:
  123. if not self.exists(filename):
  124. raise ValueError(
  125. "The file '%s' could not be found with %r." % (filename, self)
  126. )
  127. try:
  128. content =
  129. except OSError:
  130. # Handle directory paths and fragments
  131. return name
  132. try:
  133. file_hash = self.file_hash(clean_name, content)
  134. finally:
  135. if opened:
  136. content.close()
  137. path, filename = os.path.split(clean_name)
  138. root, ext = os.path.splitext(filename)
  139. file_hash = (".%s" % file_hash) if file_hash else ""
  140. hashed_name = os.path.join(path, "%s%s%s" % (root, file_hash, ext))
  141. unparsed_name = list(parsed_name)
  142. unparsed_name[2] = hashed_name
  143. # Special casing for a @font-face hack, like url(myfont.eot?#iefix")
  144. #
  145. if "?#" in name and not unparsed_name[3]:
  146. unparsed_name[2] += "?"
  147. return urlunsplit(unparsed_name)
  148. def _url(self, hashed_name_func, name, force=False, hashed_files=None):
  149. """
  150. Return the non-hashed URL in DEBUG mode.
  151. """
  152. if settings.DEBUG and not force:
  153. hashed_name, fragment = name, ""
  154. else:
  155. clean_name, fragment = urldefrag(name)
  156. if urlsplit(clean_name).path.endswith("/"): # don't hash paths
  157. hashed_name = name
  158. else:
  159. args = (clean_name,)
  160. if hashed_files is not None:
  161. args += (hashed_files,)
  162. hashed_name = hashed_name_func(*args)
  163. final_url = super().url(hashed_name)
  164. # Special casing for a @font-face hack, like url(myfont.eot?#iefix")
  165. #
  166. query_fragment = "?#" in name # [sic!]
  167. if fragment or query_fragment:
  168. urlparts = list(urlsplit(final_url))
  169. if fragment and not urlparts[4]:
  170. urlparts[4] = fragment
  171. if query_fragment and not urlparts[3]:
  172. urlparts[2] += "?"
  173. final_url = urlunsplit(urlparts)
  174. return unquote(final_url)
  175. def url(self, name, force=False):
  176. """
  177. Return the non-hashed URL in DEBUG mode.
  178. """
  179. return self._url(self.stored_name, name, force)
  180. def url_converter(self, name, hashed_files, template=None):
  181. """
  182. Return the custom URL converter for the given file name.
  183. """
  184. if template is None:
  185. template = self.default_template
  186. def converter(matchobj):
  187. """
  188. Convert the matched URL to a normalized and hashed URL.
  189. This requires figuring out which files the matched URL resolves
  190. to and calling the url() method of the storage.
  191. """
  192. matches = matchobj.groupdict()
  193. matched = matches["matched"]
  194. url = matches["url"]
  195. # Ignore absolute/protocol-relative and data-uri URLs.
  196. if re.match(r"^[a-z]+:", url):
  197. return matched
  198. # Ignore absolute URLs that don't point to a static file (dynamic
  199. # CSS / JS?). Note that STATIC_URL cannot be empty.
  200. if url.startswith("/") and not url.startswith(settings.STATIC_URL):
  201. return matched
  202. # Strip off the fragment so a path-like fragment won't interfere.
  203. url_path, fragment = urldefrag(url)
  204. # Ignore URLs without a path
  205. if not url_path:
  206. return matched
  207. if url_path.startswith("/"):
  208. # Otherwise the condition above would have returned prematurely.
  209. assert url_path.startswith(settings.STATIC_URL)
  210. target_name = url_path[len(settings.STATIC_URL) :]
  211. else:
  212. # We're using the posixpath module to mix paths and URLs conveniently.
  213. source_name = name if os.sep == "/" else name.replace(os.sep, "/")
  214. target_name = posixpath.join(posixpath.dirname(source_name), url_path)
  215. # Determine the hashed name of the target file with the storage backend.
  216. hashed_url = self._url(
  217. self._stored_name,
  218. unquote(target_name),
  219. force=True,
  220. hashed_files=hashed_files,
  221. )
  222. transformed_url = "/".join(
  223. url_path.split("/")[:-1] + hashed_url.split("/")[-1:]
  224. )
  225. # Restore the fragment that was stripped off earlier.
  226. if fragment:
  227. transformed_url += ("?#" if "?#" in url else "#") + fragment
  228. # Return the hashed version to the file
  229. matches["url"] = unquote(transformed_url)
  230. return template % matches
  231. return converter
  232. def post_process(self, paths, dry_run=False, **options):
  233. """
  234. Post process the given dictionary of files (called from collectstatic).
  235. Processing is actually two separate operations:
  236. 1. renaming files to include a hash of their content for cache-busting,
  237. and copying those files to the target storage.
  238. 2. adjusting files which contain references to other files so they
  239. refer to the cache-busting filenames.
  240. If either of these are performed on a file, then that file is considered
  241. post-processed.
  242. """
  243. # don't even dare to process the files if we're in dry run mode
  244. if dry_run:
  245. return
  246. # where to store the new paths
  247. hashed_files = {}
  248. # build a list of adjustable files
  249. adjustable_paths = [
  250. path for path in paths if matches_patterns(path, self._patterns)
  251. ]
  252. # Adjustable files to yield at end, keyed by the original path.
  253. processed_adjustable_paths = {}
  254. # Do a single pass first. Post-process all files once, yielding not
  255. # adjustable files and exceptions, and collecting adjustable files.
  256. for name, hashed_name, processed, _ in self._post_process(
  257. paths, adjustable_paths, hashed_files
  258. ):
  259. if name not in adjustable_paths or isinstance(processed, Exception):
  260. yield name, hashed_name, processed
  261. else:
  262. processed_adjustable_paths[name] = (name, hashed_name, processed)
  263. paths = {path: paths[path] for path in adjustable_paths}
  264. substitutions = False
  265. for i in range(self.max_post_process_passes):
  266. substitutions = False
  267. for name, hashed_name, processed, subst in self._post_process(
  268. paths, adjustable_paths, hashed_files
  269. ):
  270. # Overwrite since hashed_name may be newer.
  271. processed_adjustable_paths[name] = (name, hashed_name, processed)
  272. substitutions = substitutions or subst
  273. if not substitutions:
  274. break
  275. if substitutions:
  276. yield "All", None, RuntimeError("Max post-process passes exceeded.")
  277. # Store the processed paths
  278. self.hashed_files.update(hashed_files)
  279. # Yield adjustable files with final, hashed name.
  280. yield from processed_adjustable_paths.values()
  281. def _post_process(self, paths, adjustable_paths, hashed_files):
  282. # Sort the files by directory level
  283. def path_level(name):
  284. return len(name.split(os.sep))
  285. for name in sorted(paths, key=path_level, reverse=True):
  286. substitutions = True
  287. # use the original, local file, not the copied-but-unprocessed
  288. # file, which might be somewhere far away, like S3
  289. storage, path = paths[name]
  290. with as original_file:
  291. cleaned_name = self.clean_name(name)
  292. hash_key = self.hash_key(cleaned_name)
  293. # generate the hash with the original content, even for
  294. # adjustable files.
  295. if hash_key not in hashed_files:
  296. hashed_name = self.hashed_name(name, original_file)
  297. else:
  298. hashed_name = hashed_files[hash_key]
  299. # then get the original's file content..
  300. if hasattr(original_file, "seek"):
  302. hashed_file_exists = self.exists(hashed_name)
  303. processed = False
  304. # apply each replacement pattern to the content
  305. if name in adjustable_paths:
  306. old_hashed_name = hashed_name
  307. content ="utf-8")
  308. for extension, patterns in self._patterns.items():
  309. if matches_patterns(path, (extension,)):
  310. for pattern, template in patterns:
  311. converter = self.url_converter(
  312. name, hashed_files, template
  313. )
  314. try:
  315. content = pattern.sub(converter, content)
  316. except ValueError as exc:
  317. yield name, None, exc, False
  318. if hashed_file_exists:
  319. self.delete(hashed_name)
  320. # then save the processed result
  321. content_file = ContentFile(content.encode())
  322. if self.keep_intermediate_files:
  323. # Save intermediate file for reference
  324. self._save(hashed_name, content_file)
  325. hashed_name = self.hashed_name(name, content_file)
  326. if self.exists(hashed_name):
  327. self.delete(hashed_name)
  328. saved_name = self._save(hashed_name, content_file)
  329. hashed_name = self.clean_name(saved_name)
  330. # If the file hash stayed the same, this file didn't change
  331. if old_hashed_name == hashed_name:
  332. substitutions = False
  333. processed = True
  334. if not processed:
  335. # or handle the case in which neither processing nor
  336. # a change to the original file happened
  337. if not hashed_file_exists:
  338. processed = True
  339. saved_name = self._save(hashed_name, original_file)
  340. hashed_name = self.clean_name(saved_name)
  341. # and then set the cache accordingly
  342. hashed_files[hash_key] = hashed_name
  343. yield name, hashed_name, processed, substitutions
  344. def clean_name(self, name):
  345. return name.replace("\\", "/")
  346. def hash_key(self, name):
  347. return name
  348. def _stored_name(self, name, hashed_files):
  349. # Normalize the path to avoid multiple names for the same file like
  350. # ../foo/bar.css and ../foo/../foo/bar.css which normalize to the same
  351. # path.
  352. name = posixpath.normpath(name)
  353. cleaned_name = self.clean_name(name)
  354. hash_key = self.hash_key(cleaned_name)
  355. cache_name = hashed_files.get(hash_key)
  356. if cache_name is None:
  357. cache_name = self.clean_name(self.hashed_name(name))
  358. return cache_name
  359. def stored_name(self, name):
  360. cleaned_name = self.clean_name(name)
  361. hash_key = self.hash_key(cleaned_name)
  362. cache_name = self.hashed_files.get(hash_key)
  363. if cache_name:
  364. return cache_name
  365. # No cached name found, recalculate it from the files.
  366. intermediate_name = name
  367. for i in range(self.max_post_process_passes + 1):
  368. cache_name = self.clean_name(
  369. self.hashed_name(name, content=None, filename=intermediate_name)
  370. )
  371. if intermediate_name == cache_name:
  372. # Store the hashed name if there was a miss.
  373. self.hashed_files[hash_key] = cache_name
  374. return cache_name
  375. else:
  376. # Move on to the next intermediate file.
  377. intermediate_name = cache_name
  378. # If the cache name can't be determined after the max number of passes,
  379. # the intermediate files on disk may be corrupt; avoid an infinite loop.
  380. raise ValueError("The name '%s' could not be hashed with %r." % (name, self))
  381. class ManifestFilesMixin(HashedFilesMixin):
  382. manifest_version = "1.1" # the manifest format standard
  383. manifest_name = "staticfiles.json"
  384. manifest_strict = True
  385. keep_intermediate_files = False
  386. def __init__(self, *args, manifest_storage=None, **kwargs):
  387. super().__init__(*args, **kwargs)
  388. if manifest_storage is None:
  389. manifest_storage = self
  390. self.manifest_storage = manifest_storage
  391. self.hashed_files, self.manifest_hash = self.load_manifest()
  392. def read_manifest(self):
  393. try:
  394. with as manifest:
  395. return
  396. except FileNotFoundError:
  397. return None
  398. def load_manifest(self):
  399. content = self.read_manifest()
  400. if content is None:
  401. return {}, ""
  402. try:
  403. stored = json.loads(content)
  404. except json.JSONDecodeError:
  405. pass
  406. else:
  407. version = stored.get("version")
  408. if version in ("1.0", "1.1"):
  409. return stored.get("paths", {}), stored.get("hash", "")
  410. raise ValueError(
  411. "Couldn't load manifest '%s' (version %s)"
  412. % (self.manifest_name, self.manifest_version)
  413. )
  414. def post_process(self, *args, **kwargs):
  415. self.hashed_files = {}
  416. yield from super().post_process(*args, **kwargs)
  417. if not kwargs.get("dry_run"):
  418. self.save_manifest()
  419. def save_manifest(self):
  420. self.manifest_hash = self.file_hash(
  421. None, ContentFile(json.dumps(sorted(self.hashed_files.items())).encode())
  422. )
  423. payload = {
  424. "paths": self.hashed_files,
  425. "version": self.manifest_version,
  426. "hash": self.manifest_hash,
  427. }
  428. if self.manifest_storage.exists(self.manifest_name):
  429. self.manifest_storage.delete(self.manifest_name)
  430. contents = json.dumps(payload).encode()
  431. self.manifest_storage._save(self.manifest_name, ContentFile(contents))
  432. def stored_name(self, name):
  433. parsed_name = urlsplit(unquote(name))
  434. clean_name = parsed_name.path.strip()
  435. hash_key = self.hash_key(clean_name)
  436. cache_name = self.hashed_files.get(hash_key)
  437. if cache_name is None:
  438. if self.manifest_strict:
  439. raise ValueError(
  440. "Missing staticfiles manifest entry for '%s'" % clean_name
  441. )
  442. cache_name = self.clean_name(self.hashed_name(name))
  443. unparsed_name = list(parsed_name)
  444. unparsed_name[2] = cache_name
  445. # Special casing for a @font-face hack, like url(myfont.eot?#iefix")
  446. #
  447. if "?#" in name and not unparsed_name[3]:
  448. unparsed_name[2] += "?"
  449. return urlunsplit(unparsed_name)
  450. class ManifestStaticFilesStorage(ManifestFilesMixin, StaticFilesStorage):
  451. """
  452. A static file system storage backend which also saves
  453. hashed copies of the files it saves.
  454. """
  455. pass
  456. class ConfiguredStorage(LazyObject):
  457. def _setup(self):
  458. self._wrapped = storages[STATICFILES_STORAGE_ALIAS]
  459. staticfiles_storage = ConfiguredStorage()