alt-text.js 7.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218
  1. const icon = `<svg width="16" height="16" class="Draftail-Icon" aria-hidden="true" viewBox="0 0 576 512" fill="currentColor"><path d="M234.7 42.7L197 56.8c-3 1.1-5 4-5 7.2s2 6.1 5 7.2l37.7 14.1L248.8 123c1.1 3 4 5 7.2 5s6.1-2 7.2-5l14.1-37.7L315 71.2c3-1.1 5-4 5-7.2s-2-6.1-5-7.2L277.3 42.7 263.2 5c-1.1-3-4-5-7.2-5s-6.1 2-7.2 5L234.7 42.7zM46.1 395.4c-18.7 18.7-18.7 49.1 0 67.9l34.6 34.6c18.7 18.7 49.1 18.7 67.9 0L529.9 116.5c18.7-18.7 18.7-49.1 0-67.9L495.3 14.1c-18.7-18.7-49.1-18.7-67.9 0L46.1 395.4zM484.6 82.6l-105 105-23.3-23.3 105-105 23.3 23.3zM7.5 117.2C3 118.9 0 123.2 0 128s3 9.1 7.5 10.8L64 160l21.2 56.5c1.7 4.5 6 7.5 10.8 7.5s9.1-3 10.8-7.5L128 160l56.5-21.2c4.5-1.7 7.5-6 7.5-10.8s-3-9.1-7.5-10.8L128 96 106.8 39.5C105.1 35 100.8 32 96 32s-9.1 3-10.8 7.5L64 96 7.5 117.2zm352 256c-4.5 1.7-7.5 6-7.5 10.8s3 9.1 7.5 10.8L416 416l21.2 56.5c1.7 4.5 6 7.5 10.8 7.5s9.1-3 10.8-7.5L480 416l56.5-21.2c4.5-1.7 7.5-6 7.5-10.8s-3-9.1-7.5-10.8L480 352l-21.2-56.5c-1.7-4.5-6-7.5-10.8-7.5s-9.1 3-10.8 7.5L416 352l-56.5 21.2z"></path></svg>`;
  2. class AltTextController extends window.StimulusModule.Controller {
  3. static targets = ['suggest'];
  4. static values = {
  5. imageInput: { default: '', type: String },
  6. captionInput: { default: '', type: String },
  7. contextual: { default: false, type: Boolean },
  8. };
  9. /** An image-to-text pipeline, shared between all instances of this controller. */
  10. static captioner;
  11. /** A text-to-text pipeline for enhancing captions, shared between all instances of this controller. */
  12. static text2text;
  13. static {
  14. import('https://cdn.jsdelivr.net/npm/@xenova/transformers@2.17.2').then(
  15. ({ pipeline }) => {
  16. this.captioner = pipeline('image-to-text', 'Mozilla/distilvit');
  17. this.text2text = pipeline(
  18. 'text2text-generation',
  19. 'Xenova/LaMini-Flan-T5-783M',
  20. );
  21. },
  22. );
  23. }
  24. /**
  25. * Convert an array of input elements into a single string,
  26. * concatenating their values or inner text.
  27. * @param {Array<HTMLInputElement | HTMLTextAreaElement | HTMLDivElement>} inputs
  28. * @returns {string} The concatenated text from the inputs
  29. */
  30. static inputsToText = (inputs) =>
  31. inputs
  32. .map((input) => input.value || input.innerText)
  33. .filter((text) => !!text.trim())
  34. .join('\n\n');
  35. get imageURL() {
  36. return this.element.querySelector('img[data-chooser-image]')?.src || '';
  37. }
  38. // Override only for JSDoc/typing purposes, not for functionality
  39. /** @returns {HTMLElement} */
  40. get element() {
  41. return super.element;
  42. }
  43. /**
  44. * All text inputs in the form.
  45. * @returns {Array<HTMLInputElement | HTMLTextAreaElement | HTMLDivElement>}
  46. */
  47. get textInputs() {
  48. return [
  49. ...this.captionInput.form.querySelectorAll(
  50. 'input[type="text"], textarea, [role="textbox"]',
  51. ),
  52. ].filter((input) => input !== this.captionInput);
  53. }
  54. /**
  55. * Text inputs in the form, grouped by their position
  56. * relative to the caption input (before/after).
  57. * @returns {{
  58. * before: Array<HTMLInputElement | HTMLTextAreaElement | HTMLDivElement>,
  59. * after: Array<HTMLInputElement | HTMLTextAreaElement | HTMLDivElement>
  60. * }}
  61. */
  62. get textInputsContext() {
  63. return Object.groupBy(this.textInputs, (element) =>
  64. this.captionInput.compareDocumentPosition(element) &
  65. Node.DOCUMENT_POSITION_PRECEDING
  66. ? 'before'
  67. : 'after',
  68. );
  69. }
  70. get textContext() {
  71. const { inputsToText } = AltTextController;
  72. return {
  73. before: inputsToText(this.textInputsContext.before),
  74. after: inputsToText(this.textInputsContext.after),
  75. };
  76. }
  77. connect() {
  78. this.generate = this.generate.bind(this);
  79. this.caption = this.caption.bind(this);
  80. this.contextualCaption = this.contextualCaption.bind(this);
  81. this.renderFurniture();
  82. }
  83. imageInputValueChanged() {
  84. if (this.imageInputValue) {
  85. this.imageInput = this.element.querySelector(this.imageInputValue);
  86. } else {
  87. this.imageInput = null;
  88. }
  89. if (this.hasSuggestTarget) this.toggleSuggestTarget();
  90. }
  91. captionInputValueChanged() {
  92. if (this.captionInputValue) {
  93. this.captionInput = this.element.querySelector(this.captionInputValue);
  94. } else {
  95. this.captionInput = null;
  96. }
  97. }
  98. toggleSuggestTarget(event) {
  99. if (event?.target && event.target !== this.imageInput) return;
  100. this.suggestTarget.disabled = !this.imageInput?.value;
  101. }
  102. renderFurniture() {
  103. this.renderSuggestButton();
  104. this.renderOutputArea();
  105. this.toggleSuggestTarget();
  106. }
  107. renderSuggestButton() {
  108. if (this.hasSuggestTarget) return;
  109. const prefix = this.element.closest('[id]').id;
  110. const buttonId = `${prefix}-generate`;
  111. const button = /* html */ `
  112. <button
  113. id="${buttonId}"
  114. type="button"
  115. data-alt-text-target="suggest"
  116. data-action="alt-text#generate"
  117. class="button button-secondary"
  118. >
  119. ${icon}
  120. <span>Generate suggestions</span>
  121. </button>
  122. `;
  123. this.element.insertAdjacentHTML('beforeend', button);
  124. }
  125. renderOutputArea() {
  126. const css = new CSSStyleSheet();
  127. css.replaceSync(/* css */ `
  128. .suggestion {
  129. display: block;
  130. margin-top: 0.5rem;
  131. margin-bottom: 0.5rem;
  132. border-radius: 0.25rem;
  133. padding: 0.5rem;
  134. background-color: lightblue;
  135. color: black;
  136. }
  137. `);
  138. this.outputArea = document.createElement('div');
  139. document.adoptedStyleSheets.push(css);
  140. this.element.append(this.outputArea);
  141. }
  142. renderSuggestion(suggestion) {
  143. const template = document.createElement('template');
  144. template.innerHTML = /* html */ `
  145. <div class="suggestion">
  146. <output for="${this.suggestTarget.id}">${suggestion}</output>
  147. <button class="button button-small" type="button" data-action="alt-text#useSuggestion">Use</button>
  148. </div>
  149. `;
  150. this.outputArea.append(template.content.firstElementChild);
  151. }
  152. useSuggestion(event) {
  153. if (!this.captionInput) return;
  154. this.captionInput.value = event.target.previousElementSibling.textContent;
  155. }
  156. async caption(imageURL) {
  157. const captioner = await AltTextController.captioner;
  158. return (await captioner(imageURL))[0].generated_text;
  159. }
  160. async contextualCaption(imageURL) {
  161. const caption = await this.caption(imageURL);
  162. const text2text = await AltTextController.text2text;
  163. const { before, after } = this.textContext;
  164. // Enhance the caption to be more descriptive
  165. // using the text context from the form.
  166. const prompt = `
  167. system: Change the following caption to be more descriptive: "${caption}"
  168. system: Given this content shown before the image: ${before}
  169. system: And this content shown after the image: ${after}`;
  170. return (await text2text(prompt))[0].generated_text;
  171. }
  172. async generate() {
  173. this.outputArea.innerHTML = ''; // Clear previous output
  174. this.suggestTarget.lastElementChild.textContent = 'Generating…';
  175. this.suggestTarget.disabled = true;
  176. const method = this.contextualValue ? this.contextualCaption : this.caption;
  177. const url = this.imageURL;
  178. await Promise.allSettled(
  179. [...Array(3).keys()].map(() =>
  180. method(url)
  181. .then((output) => this.renderSuggestion(output))
  182. .catch((error) => {
  183. console.error('Error generating suggestion:', error);
  184. }),
  185. ),
  186. );
  187. this.suggestTarget.disabled = false;
  188. this.suggestTarget.lastElementChild.textContent = 'Generate suggestions';
  189. }
  190. }
  191. window.wagtail.app.register('alt-text', AltTextController);