import dompurify from 'dompurify';

import { NewsArticle } from '../models/newsArticlesModels';

const domPurifier = dompurify(window);

/**
 * The CMS (Enonic) returns the news articles as HTML. We have to
 * parse the HTML to domain objects to use them in the application.
 *
 * The structure of the HTML is as follows:
 * - An image element for the image.
 * - A heading element (h1, h2, h3) for the header.
 * - Paragraph elements for the description.
 * - An anchor element for the link.
 *
 * Note that the field for the heading and description in the CMS is a text field
 * with no validation. Someone might enter invalid HTML that breaks the parsing.
 *
 * If the parsing fails for one of the news articles, handle the use case here.
 * Or fix the bad HTML in the CMS for the news article.
 */
export const convertToArticles = (html: string): Array<NewsArticle> => {
  const sanitizedHtml = domPurifier.sanitize(html, {
    ALLOWED_ATTR: ['href', 'img-src'],
  });

  const element = document.createElement('div');
  element.innerHTML = sanitizedHtml;

  const newsNodes = [...element.querySelectorAll('*[data-type=col]')];

  return newsNodes
    .map((newsNode) => {
      try {
        // The input fields in the CMS are text fields with no validation.
        // Someone might have entered invalid HTML that breaks the parsing.
        // Catch any errors to make it more robust if one of the news articles is invalid.
        return convertNodeToArticleObject(newsNode);
      } catch (error) {
        console.error(
          'Failed to map news from the CMS to domain object.',
          error,
        );
        return null;
      }
    })
    .filter((article) => article !== null);
};

const convertNodeToArticleObject = (node: Element): NewsArticle => {
  // Select the first heading element as the header.
  const headerNode =
    node.querySelector('h1') ||
    node.querySelector('h2') ||
    node.querySelector('h3');

  const link = node.querySelector('a')!.href;

  const image = node.querySelector('img')!.getAttribute('img-src')!;

  const header = headerNode!.innerHTML.trim();

  const description = [...node.querySelectorAll('p')].map((paragraphNode) =>
    paragraphNode.innerHTML.replace(/&nbsp;/g, ' ').trim(),
  );

  return {
    link,
    image,
    header,
    description,
  };
};
