// noinspection NonAsciiCharacters

import type { WordBlock } from "@newpv/js-common"
import _ from "lodash"

import { removeAllMarkup } from "./parseSentence"

export const generateSentence = (wordBlocks?: WordBlock[]): string => {
  if (!wordBlocks) {
    return ""
  }
  const initialString = wordBlocks.reduce(
    (accumulatedSentence, currentWordBlock, currentIndex) =>
      accumulatedSentence +
      (currentIndex === 0 || wordBlocks?.[currentIndex - 1]?.noSpaceAfter ? "" : " ") +
      (currentWordBlock.before ?? "") +
      currentWordBlock.text +
      (currentWordBlock.after ?? ""),
    "",
  )

  // noinspection UnnecessaryLocalVariableJS
  const cleanedString = _(removeAllMarkup(initialString))
    // replace all spaces by regular spaces \u0020
    // https://www.compart.com/en/unicode/category/Zs
    .replace(/\s/g, "\u0020")
    // replace apostrophes by regular apostrophes \u0027
    // https://www.compart.com/en/unicode/U+0027
    .replace(/[\uFF07\u2019\u02BC]/g, "\u0027")
    // dashes, replaced by hyphen-minus \u002d
    // https://www.compart.com/en/unicode/category/Pd
    .replace(/[\u002D\u1806\u2011\u2012\u2013\u2014\ufe58\ufe63\uff0d]/g, "\u002d")
    // replace ligature oe - œ and ae - æ
    // https://www.compart.com/en/unicode/U+0153
    // https://www.compart.com/en/unicode/U+00E6
    .replace(/[\u0153]/g, "oe")
    .replace(/[\u00e6]/g, "ae")
    .toLowerCase()

  return cleanedString
}
