import type { DocumentChunkMapByFilename } from '../../types/chunkedDocuments';
import { notEmpty } from '../../typeValidators';
import makeLogger from '../../utils/makeLogger';
import { createDataUrl } from './createDataUrl';
import { resolveRelativePath, unpackChunkWithFilename } from './getChunkContentFromFilename';

const logger = makeLogger(__filename);

export async function inlineImages(
  html: string,
  imageChunksByFilename: DocumentChunkMapByFilename,
  containerFilename: string,
): Promise<string> {
  const imageUrls = extractImageUrls(html);

  const imageDataUrls = await Promise.all(imageUrls.map(async (url) => {
    const dataUrl = await processImageSource(url, imageChunksByFilename, containerFilename);
    return { original: url, dataUrl };
  }));

  return imageDataUrls.reduce((currentHtml, { original, dataUrl }) => {
    if (!dataUrl) {
      return currentHtml;
    }
    return currentHtml.replaceAll(original, dataUrl);
  }, html);
}

async function processImageSource(
  filename: string,
  imageChunksByFilename: DocumentChunkMapByFilename,
  containerFilename: string,
): Promise<string | undefined> {
  if (!filename || filename.startsWith('data:')) {
    return undefined;
  }
  const absoluteFilename = resolveRelativePath(containerFilename, filename);
  const fileContent = await unpackChunkWithFilename(absoluteFilename, imageChunksByFilename);
  if (!fileContent?.data) {
    logger.warn('Chunk has no data', { chunkId: fileContent?.id });
    return undefined;
  }

  return createDataUrl(filename, fileContent.data);
}


// Data URLs are already inlined, so we don't need to replace them.
const filterDataUrl = (url: string | undefined) => {
  if (url && !url.startsWith('data:')) {
    return url;
  }
  return undefined;
};

/**
 * Patterns to extract image URLs from an HTML string.
 *
 * Each pattern matches the image URL with either single or double quotes.
 */
const patterns: [RegExp, (match: RegExpMatchArray) => string | string[] | undefined][] = [
  // Image src attributes
  [
    /<img\s+[^>]*src=["']([^"'>]+)["']/gi,
    (match) => filterDataUrl(match[1]),
  ],
  // srcset attributes
  [
    /<img\s+[^>]*srcset=["']([^"']+)["']/gi,
    (match) => match[1]
      .split(',')
      .map((src) => src.trim().split(/\s+/)[0])
      .map(filterDataUrl)
      .filter((url): url is string => url !== undefined),
  ],
];

/**
 * Extracts image URLs from an HTML string.
 * @param html - The HTML string to extract image URLs from.
 * @returns An array of unique image URLs.
 */
export function extractImageUrls(html: string): string[] {
  const urls = patterns.flatMap(([regex, selector]) =>
    Array.from(html.matchAll(regex))
      .map(selector)
      .flat()
      .filter(notEmpty));

  return Array.from(new Set(urls));
}
