const icon = ``; class AltTextController extends window.StimulusModule.Controller { static targets = ['suggest']; static values = { imageInput: { default: '', type: String }, captionInput: { default: '', type: String }, contextual: { default: false, type: Boolean }, }; /** An image-to-text pipeline, shared between all instances of this controller. */ static captioner; /** A text-to-text pipeline for enhancing captions, shared between all instances of this controller. */ static text2text; static { import('https://cdn.jsdelivr.net/npm/@xenova/transformers@2.17.2').then( ({ pipeline }) => { this.captioner = pipeline('image-to-text', 'Mozilla/distilvit'); this.text2text = pipeline( 'text2text-generation', 'Xenova/LaMini-Flan-T5-783M', ); }, ); } /** * Convert an array of input elements into a single string, * concatenating their values or inner text. * @param {Array} inputs * @returns {string} The concatenated text from the inputs */ static inputsToText = (inputs) => inputs .map((input) => input.value || input.innerText) .filter((text) => !!text.trim()) .join('\n\n'); get imageURL() { return this.element.querySelector('img[data-chooser-image]')?.src || ''; } // Override only for JSDoc/typing purposes, not for functionality /** @returns {HTMLElement} */ get element() { return super.element; } /** * All text inputs in the form. * @returns {Array} */ get textInputs() { return [ ...this.captionInput.form.querySelectorAll( 'input[type="text"], textarea, [role="textbox"]', ), ].filter((input) => input !== this.captionInput); } /** * Text inputs in the form, grouped by their position * relative to the caption input (before/after). * @returns {{ * before: Array, * after: Array * }} */ get textInputsContext() { return Object.groupBy(this.textInputs, (element) => this.captionInput.compareDocumentPosition(element) & Node.DOCUMENT_POSITION_PRECEDING ? 'before' : 'after', ); } get textContext() { const { inputsToText } = AltTextController; return { before: inputsToText(this.textInputsContext.before), after: inputsToText(this.textInputsContext.after), }; } connect() { this.generate = this.generate.bind(this); this.caption = this.caption.bind(this); this.contextualCaption = this.contextualCaption.bind(this); this.renderFurniture(); } imageInputValueChanged() { if (this.imageInputValue) { this.imageInput = this.element.querySelector(this.imageInputValue); } else { this.imageInput = null; } if (this.hasSuggestTarget) this.toggleSuggestTarget(); } captionInputValueChanged() { if (this.captionInputValue) { this.captionInput = this.element.querySelector(this.captionInputValue); } else { this.captionInput = null; } } toggleSuggestTarget(event) { if (event?.target && event.target !== this.imageInput) return; this.suggestTarget.disabled = !this.imageInput?.value; } renderFurniture() { this.renderSuggestButton(); this.renderOutputArea(); this.toggleSuggestTarget(); } renderSuggestButton() { if (this.hasSuggestTarget) return; const prefix = this.element.closest('[id]').id; const buttonId = `${prefix}-generate`; const button = /* html */ ` `; this.element.insertAdjacentHTML('beforeend', button); } renderOutputArea() { const css = new CSSStyleSheet(); css.replaceSync(/* css */ ` .suggestion { display: block; margin-top: 0.5rem; margin-bottom: 0.5rem; border-radius: 0.25rem; padding: 0.5rem; background-color: lightblue; color: black; } `); this.outputArea = document.createElement('div'); document.adoptedStyleSheets.push(css); this.element.append(this.outputArea); } renderSuggestion(suggestion) { const template = document.createElement('template'); template.innerHTML = /* html */ `
${suggestion}
`; this.outputArea.append(template.content.firstElementChild); } useSuggestion(event) { if (!this.captionInput) return; this.captionInput.value = event.target.previousElementSibling.textContent; } async caption(imageURL) { const captioner = await AltTextController.captioner; return (await captioner(imageURL))[0].generated_text; } async contextualCaption(imageURL) { const caption = await this.caption(imageURL); const text2text = await AltTextController.text2text; const { before, after } = this.textContext; // Enhance the caption to be more descriptive // using the text context from the form. const prompt = ` system: Change the following caption to be more descriptive: "${caption}" system: Given this content shown before the image: ${before} system: And this content shown after the image: ${after}`; return (await text2text(prompt))[0].generated_text; } async generate() { this.outputArea.innerHTML = ''; // Clear previous output this.suggestTarget.lastElementChild.textContent = 'Generating…'; this.suggestTarget.disabled = true; const method = this.contextualValue ? this.contextualCaption : this.caption; const url = this.imageURL; await Promise.allSettled( [...Array(3).keys()].map(() => method(url) .then((output) => this.renderSuggestion(output)) .catch((error) => { console.error('Error generating suggestion:', error); }), ), ); this.suggestTarget.disabled = false; this.suggestTarget.lastElementChild.textContent = 'Generate suggestions'; } } window.wagtail.app.register('alt-text', AltTextController);