Browse Source

Update richtext to streamfield migration snippet

Perform the data migration while it's still a rich text field, to avoid the need for `use_json_field=False`. Also work around the unavailability of `page.revisions` on frozen ORM models (which has been absent since 4.0, since it's now a property on RevisionMixin rather than a true relation).
Matt Westcott 2 years ago
parent
commit
635db6d850
1 changed files with 80 additions and 52 deletions
  1. 80 52
      docs/advanced_topics/streamfield_migrations.md

+ 80 - 52
docs/advanced_topics/streamfield_migrations.md

@@ -8,33 +8,35 @@
 
 If you change an existing RichTextField to a StreamField, the database migration will complete with no errors, since both fields use a text column within the database. However, StreamField uses a JSON representation for its data, so the existing text requires an extra conversion step in order to become accessible again. For this to work, the StreamField needs to include a RichTextBlock as one of the available block types. Create the migration as normal using `./manage.py makemigrations`, then edit it as follows (in this example, the 'body' field of the `demo.BlogPage` model is being converted to a StreamField with a RichTextBlock named `rich_text`):
 
-```{note}
-This migration cannot be used if the StreamField has the `use_json_field` argument set to `True`. To migrate, set the `use_json_field` argument to `False` first, migrate the data, then set it back to `True`.
-```
-
 ```python
-# -*- coding: utf-8 -*-
-from django.db import models, migrations
-from wagtail.rich_text import RichText
+import json
+
+from django.core.serializers.json import DjangoJSONEncoder
+from django.db import migrations
+
+import wagtail.blocks
+import wagtail.fields
 
 
 def convert_to_streamfield(apps, schema_editor):
     BlogPage = apps.get_model("demo", "BlogPage")
     for page in BlogPage.objects.all():
-        if page.body.raw_text and not page.body:
-            page.body = [('rich_text', RichText(page.body.raw_text))]
-            page.save()
+        page.body = json.dumps(
+            [{"type": "rich_text", "value": page.body}],
+            cls=DjangoJSONEncoder
+        )
+        page.save()
 
 
 def convert_to_richtext(apps, schema_editor):
     BlogPage = apps.get_model("demo", "BlogPage")
     for page in BlogPage.objects.all():
-        if page.body.raw_text is None:
-            raw_text = ''.join([
-                child.value.source for child in page.body
-                if child.block_type == 'rich_text'
+        if page.body:
+            stream = json.loads(page.body)
+            page.body = "".join([
+                child["value"] for child in stream
+                if child["type"] == "rich_text"
             ])
-            page.body = raw_text
             page.save()
 
 
@@ -42,52 +44,64 @@ class Migration(migrations.Migration):
 
     dependencies = [
         # leave the dependency line from the generated migration intact!
-        ('demo', '0001_initial'),
+        ("demo", "0001_initial"),
     ]
 
     operations = [
-        # leave the generated AlterField intact!
-        migrations.AlterField(
-            model_name='BlogPage',
-            name='body',
-            field=wagtail.fields.StreamField([('rich_text', wagtail.blocks.RichTextBlock())]),
-        ),
-
         migrations.RunPython(
             convert_to_streamfield,
             convert_to_richtext,
         ),
+
+        # leave the generated AlterField intact!
+        migrations.AlterField(
+            model_name="BlogPage",
+            name="body",
+            field=wagtail.fields.StreamField(
+                [("rich_text", wagtail.blocks.RichTextBlock())],
+                use_json_field=True,
+            ),
+        ),
     ]
 ```
 
 Note that the above migration will work on published Page objects only. If you also need to migrate draft pages and page revisions, then edit the migration as in the following example instead:
 
 ```python
-# -*- coding: utf-8 -*-
 import json
 
+from django.contrib.contenttypes.models import ContentType
 from django.core.serializers.json import DjangoJSONEncoder
-from django.db import migrations, models
+from django.db import migrations
 
-from wagtail.rich_text import RichText
+import wagtail.blocks
+import wagtail.fields
 
 
 def page_to_streamfield(page):
     changed = False
-    if page.body.raw_text and not page.body:
-        page.body = [('rich_text', RichText(page.body.raw_text))]
+    try:
+        json.loads(page.body)
+    except ValueError:
+        page.body = json.dumps(
+            [{"type": "rich_text", "value": page.body}],
+        )
         changed = True
+    else:
+        # It's already valid JSON. Leave it.
+        pass
+
     return page, changed
 
 
 def pagerevision_to_streamfield(revision_data):
     changed = False
-    body = revision_data.get('body')
+    body = revision_data.get("body")
     if body:
         try:
             json.loads(body)
         except ValueError:
-            revision_data['body'] = json.dumps(
+            revision_data["body"] = json.dumps(
                 [{
                     "value": body,
                     "type": "rich_text"
@@ -101,20 +115,25 @@ def pagerevision_to_streamfield(revision_data):
 
 
 def page_to_richtext(page):
-    changed = False
-    if page.body.raw_text is None:
-        raw_text = ''.join([
-            child.value.source for child in page.body
-            if child.block_type == 'rich_text'
-        ])
-        page.body = raw_text
-        changed = True
+    if page.body:
+        try:
+            body_data = json.loads(page.body)
+        except ValueError:
+            # It's not apparently a StreamField. Leave it.
+            pass
+        else:
+            page.body = "".join([
+                child["value"] for child in body_data
+                if child["type"] == "rich_text"
+            ])
+            changed = True
+
     return page, changed
 
 
 def pagerevision_to_richtext(revision_data):
     changed = False
-    body = revision_data.get('body', 'definitely non-JSON string')
+    body = revision_data.get("body", "definitely non-JSON string")
     if body:
         try:
             body_data = json.loads(body)
@@ -122,24 +141,29 @@ def pagerevision_to_richtext(revision_data):
             # It's not apparently a StreamField. Leave it.
             pass
         else:
-            raw_text = ''.join([
-                child['value'] for child in body_data
-                if child['type'] == 'rich_text'
+            raw_text = "".join([
+                child["value"] for child in body_data
+                if child["type"] == "rich_text"
             ])
-            revision_data['body'] = raw_text
+            revision_data["body"] = raw_text
             changed = True
     return revision_data, changed
 
 
 def convert(apps, schema_editor, page_converter, pagerevision_converter):
     BlogPage = apps.get_model("demo", "BlogPage")
+    content_type = ContentType.objects.get_for_model(BlogPage)
+    Revision = apps.get_model("wagtailcore", "Revision")
+
     for page in BlogPage.objects.all():
 
         page, changed = page_converter(page)
         if changed:
             page.save()
 
-        for revision in page.revisions.all():
+        for revision in Revision.objects.filter(
+            content_type_id=content_type.pk, object_id=page.pk
+        ):
             revision_data = revision.content
             revision_data, changed = pagerevision_converter(revision_data)
             if changed:
@@ -159,21 +183,25 @@ class Migration(migrations.Migration):
 
     dependencies = [
         # leave the dependency line from the generated migration intact!
-        ('demo', '0001_initial'),
+        ("demo", "0001_initial"),
+        ("wagtailcore", "0076_modellogentry_revision"),
     ]
 
     operations = [
-        # leave the generated AlterField intact!
-        migrations.AlterField(
-            model_name='BlogPage',
-            name='body',
-            field=wagtail.fields.StreamField([('rich_text', wagtail.blocks.RichTextBlock())]),
-        ),
-
         migrations.RunPython(
             convert_to_streamfield,
             convert_to_richtext,
         ),
+
+        # leave the generated AlterField intact!
+        migrations.AlterField(
+            model_name="BlogPage",
+            name="body",
+            field=wagtail.fields.StreamField(
+                [("rich_text", wagtail.blocks.RichTextBlock())],
+                use_json_field=True,
+            ),
+        ),
     ]
 ```