import { isLangCode } from 'is-language-code';

import makeLogger from './makeLogger';

const logger = makeLogger(__filename);

export default function getWords(input: string, languageCode: string | 'unknown'): string[] {
  const trimmedInput = input.trim();
  const languageCodeToUse = languageCode === 'unknown' ? undefined : languageCode;

  if ('Intl' in global && 'Segmenter' in global.Intl) {
    if (!languageCodeToUse || isLangCode(languageCodeToUse).res) {
      try {
        // For some reason, tsc fails due to Segmenter being undefined. Couldn't figure out a fix
        // eslint-disable-next-line @typescript-eslint/ban-ts-comment
        // @ts-ignore
        const segmenter = new Intl.Segmenter(languageCodeToUse, { granularity: 'word' });
        const segments: { isWordLike?: boolean; segment: string; }[] = Array.from(
          segmenter.segment(trimmedInput),
        );

        const filteredSegments = segments
          .filter((segment) => segment.isWordLike)
          .map((segment) => segment.segment);

        // If we have identified at least one word with our language logic, return them
        // Otherwise, fall back to our default logic.
        if (filteredSegments.length) {
          return filteredSegments;
        }
      } catch (e) {
        logger.warn('Intl.Segmenter threw an error;', { e });
      }
    } else {
      logger.warn("languageCode given isn't valid, falling back to regex");
    }
  }

  const whitespaceSymbols: string[] = [
    ' ',
    // eslint-disable-next-line no-misleading-character-class
    /[\u00B7\u02BB\u02BD\u0312-\u0315\u0326\u055D\u060C\u07F8\u1363\u1802\u1808\u201A\u2E32\u2E34\u2E41\u2E49\u3001\uA4FE\uA60D\uA6F5\uFE10\uFE11\uFE50\uFE51\uFF0C\uFF1B\uFF64]/,
    /[\u4e00-\u9fcc]/,
    /\u2014/,
    /\uD805\uDC4D/,
    /\uD836\uDE87/,
  ].map((symbol) => typeof symbol === 'string' ? symbol : symbol.source);
  return trimmedInput
    .split(new RegExp(`(${whitespaceSymbols.join('|')})`, 'g'))
    .filter((word) => Boolean(word.trim()));
}
