importexport.py 5.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146
  1. """
  2. This code is largely copied or extended upon the now defunct
  3. ``wagtailimportexport`` package.
  4. In the future we may want to build a more robust import/exporter for CSV files,
  5. or simply deprecate all of this functionality.
  6. See: https://github.com/torchbox/wagtail-import-export/
  7. """
  8. import csv
  9. import copy
  10. from django import forms
  11. from django.apps import apps
  12. from django.contrib.contenttypes.models import ContentType
  13. from django.db import models, transaction
  14. from django.utils.translation import gettext as _
  15. from modelcluster.models import get_all_child_relations
  16. from wagtail.admin.widgets import AdminPageChooser
  17. from wagtail.models import Page
  18. from wagtailcrx.forms import get_page_model_choices
  19. class ImportPagesFromCSVFileForm(forms.Form):
  20. """
  21. Mostly copied from:
  22. https://github.com/torchbox/wagtail-import-export/blob/master/wagtailimportexport/forms.py#L29
  23. with addition of ``page_type``.
  24. """
  25. page_type = forms.ChoiceField(choices=get_page_model_choices)
  26. file = forms.FileField(label=_("File to import"))
  27. parent_page = forms.ModelChoiceField(
  28. queryset=Page.objects.all(),
  29. widget=AdminPageChooser(can_choose_root=True, show_edit_link=False),
  30. label=_("Destination parent page"),
  31. help_text=_("Imported pages will be created as children of this page.")
  32. )
  33. def update_page_references(model, pages_by_original_id):
  34. """
  35. Copied from:
  36. https://github.com/torchbox/wagtail-import-export/blob/master/wagtailimportexport/importing.py#L67
  37. """
  38. for field in model._meta.get_fields():
  39. if isinstance(field, models.ForeignKey) and issubclass(field.related_model, Page):
  40. linked_page_id = getattr(model, field.attname)
  41. try:
  42. # see if the linked page is one of the ones we're importing
  43. linked_page = pages_by_original_id[linked_page_id]
  44. except KeyError:
  45. # any references to pages outside of the import should be left unchanged
  46. continue
  47. # update fk to the linked page's new ID
  48. setattr(model, field.attname, linked_page.id)
  49. # update references within inline child models, including the ParentalKey pointing back
  50. # to the page
  51. for rel in get_all_child_relations(model):
  52. for child in getattr(model, rel.get_accessor_name()).all():
  53. # reset the child model's PK so that it will be inserted as a new record
  54. # rather than updating an existing one
  55. child.pk = None
  56. # update page references on the child model, including the ParentalKey
  57. update_page_references(child, pages_by_original_id)
  58. @transaction.atomic()
  59. def import_pages(import_data, parent_page):
  60. """
  61. Overwrite of the wagtailimportexport `import_page` function to handle generic csvs.
  62. The standard `import_pages` assumes that your pages will have a pk from the exported
  63. json files. It does not facilitate the idea that the pages you import will be
  64. new pages.
  65. """
  66. pages_by_original_id = {}
  67. # First create the base Page records; these contain no foreign keys, so this allows us to
  68. # build a complete mapping from old IDs to new IDs before we go on to importing the
  69. # specific page models, which may require us to rewrite page IDs within foreign keys / rich
  70. # text / streamfields.
  71. page_content_type = ContentType.objects.get_for_model(Page)
  72. for page_record in import_data['pages']:
  73. # build a base Page instance from the exported content
  74. # (so that we pick up its title and other core attributes)
  75. page = Page.from_serializable_data(page_record['content'])
  76. # clear id and treebeard-related fields so that
  77. # they get reassigned when we save via add_child
  78. page.id = None
  79. page.path = None
  80. page.depth = None
  81. page.numchild = 0
  82. page.url_path = None
  83. page.content_type = page_content_type
  84. parent_page.add_child(instance=page)
  85. # Custom Code to add the new pk back into the original page record.
  86. page_record['content']['pk'] = page.pk
  87. pages_by_original_id[page.id] = page
  88. for page_record in import_data['pages']:
  89. # Get the page model of the source page by app_label and model name
  90. # The content type ID of the source page is not in general the same
  91. # between the source and destination sites but the page model needs
  92. # to exist on both.
  93. # Raises LookupError exception if there is no matching model
  94. model = apps.get_model(page_record['app_label'], page_record['model'])
  95. specific_page = model.from_serializable_data(
  96. page_record['content'],
  97. check_fks=False,
  98. strict_fks=False
  99. )
  100. base_page = pages_by_original_id[specific_page.id]
  101. specific_page.base_page_ptr = base_page
  102. specific_page.__dict__.update(base_page.__dict__)
  103. specific_page.content_type = ContentType.objects.get_for_model(model)
  104. update_page_references(specific_page, pages_by_original_id)
  105. specific_page.save()
  106. return len(import_data['pages'])
  107. def convert_csv_to_json(csv_file, page_type):
  108. pages_json = {"pages": []}
  109. app_label, klass = page_type.split(":")
  110. default_page_data = {
  111. "app_label": app_label,
  112. "content": {"pk": None},
  113. "model": klass,
  114. }
  115. pages_csv_dict = csv.DictReader(csv_file)
  116. for row in pages_csv_dict:
  117. page_dict = copy.deepcopy(default_page_data)
  118. page_dict['content'].update(row)
  119. pages_json['pages'].append(page_dict)
  120. return pages_json