import { getSentences } from '@/utils/sentence-splitter';
import { distance } from 'fastest-levenshtein';

export function normalizeText(text: string): string {
  return text
    .toLowerCase()
    .replace(/[^a-z0-9\s]+/g, '')
    .replace(/\s+/g, '')
    .trim();
}

export function getNumberParts(text: string): string[] {
  // Minimum 5 characters to not include things like years without more data
  return [...text.matchAll(/([0-9]+[a-z]*)+/g)].map((v) => v[0]).filter((v) => v.length > 4);
}

export function checkNumberParts(aNumbers: string[], bNumbers: string[]): boolean {
  const bNumbersSet = new Set(bNumbers);
  for (const aNumber of aNumbers) {
    if (bNumbersSet.has(aNumber)) {
      return true;
    }
  }
  return false;
}

interface SentenceMatch {
  paragraphIndex: number;
  sentenceIndex: number;
  matchStrength: number;
}

function getSentenceMatches(
  normalizedChunkSentence: string,
  answerSentences: string[],
  chunksSentenceNumberParts: string[],
  paragraphIndex: number,
  sentenceIndex: number,
): SentenceMatch | null {
  for (const answerSentence of answerSentences) {
    // Check number parts match
    if (chunksSentenceNumberParts.length > 0) {
      const answerSentenceNumberParts = getNumberParts(answerSentence);
      if (checkNumberParts(chunksSentenceNumberParts, answerSentenceNumberParts)) {
        return {
          paragraphIndex,
          sentenceIndex,
          matchStrength: 1.0,
        };
      }
    }

    // Check for direct inclusion
    if (normalizedChunkSentence.includes(answerSentence) || answerSentence.includes(normalizedChunkSentence)) {
      return {
        paragraphIndex,
        sentenceIndex,
        matchStrength: 1.0,
      };
    }

    // Check Levenshtein distance
    const levenDistance = distance(normalizedChunkSentence, answerSentence);
    const maxDistance = Math.max(normalizedChunkSentence.length / 10, 5);
    if (levenDistance < maxDistance) {
      const matchStrength = 1 - levenDistance / maxDistance;
      return {
        paragraphIndex,
        sentenceIndex,
        matchStrength,
      };
    }
  }

  return null;
}

function shouldHighlightParagraph(
  matches: SentenceMatch[],
  paragraphIndex: number,
  clusterThreshold: number = 2,
): boolean {
  // Check if this paragraph has any direct matches
  if (matches.some((m) => m.paragraphIndex === paragraphIndex && m.matchStrength > 0.8)) {
    return true;
  }

  // Check if this paragraph is part of a cluster
  const nearbyMatches = matches.filter(
    (m) => Math.abs(m.paragraphIndex - paragraphIndex) <= clusterThreshold && m.matchStrength > 0.6,
  );

  return nearbyMatches.length >= 2;
}

export function getHighlightedText(
  chunk: string,
  answerText: string,
): Array<{ value: string; isHighlighted: boolean }> {
  const answerSentences = getSentences(answerText)
    .map((v) => normalizeText(v))
    .filter((v) => v.length > 12);
  const paragraphs = chunk.split('\n');

  // First pass: collect all sentence matches
  const matchesByAnswerSentence = new Map<string, SentenceMatch[]>();

  paragraphs.forEach((paragraph, paragraphIndex) => {
    const sentences = getSentences(paragraph);

    sentences.forEach((sentence, sentenceIndex) => {
      const normalizedChunkSentence = normalizeText(sentence);
      const chunksSentenceNumberParts = getNumberParts(normalizedChunkSentence);

      if (normalizedChunkSentence.length > 12) {
        const match = getSentenceMatches(
          normalizedChunkSentence,
          answerSentences,
          chunksSentenceNumberParts,
          paragraphIndex,
          sentenceIndex,
        );

        if (match) {
          matchesByAnswerSentence.set(sentence, [...(matchesByAnswerSentence.get(sentence) || []), match]);
        }
      }
    });
  });

  // Collect all unique paragraph indices where matches occur
  const matchLocations = new Set<number>();
  for (const match of matchesByAnswerSentence) {
    for (const m of match[1]) {
      matchLocations.add(m.paragraphIndex);
    }
  }

  // If we have matches, find the best clusters
  const paragraphsToHighlight = new Set<number>();
  if (matchLocations.size > 0) {
    const locationArray = Array.from(matchLocations).sort((a, b) => a - b);

    // For each matched sentence that appears in multiple paragraphs,
    // choose the paragraph that's closest to other matches
    for (const [sentence, matches] of matchesByAnswerSentence) {
      if (matches.length > 1) {
        // Find the match closest to other matches
        let bestMatch = matches[0]!;
        let minAvgDistance = Infinity;

        for (const match of matches) {
          const avgDistance =
            locationArray
              .filter((loc) => loc !== match.paragraphIndex)
              .reduce((sum, loc) => sum + Math.abs(loc - match.paragraphIndex), 0) /
            (locationArray.length - 1);

          if (avgDistance < minAvgDistance) {
            minAvgDistance = avgDistance;
            bestMatch = match;
          }
        }

        paragraphsToHighlight.add(bestMatch.paragraphIndex);
      } else if (matches.length === 1) {
        // If there's only one match, always include it
        paragraphsToHighlight.add(matches[0]!.paragraphIndex);
      }
    }

    // Add context by including adjacent paragraphs if they're close to other matches
    const highlightedArray = Array.from(paragraphsToHighlight).sort((a, b) => a - b);
    for (const idx of highlightedArray) {
      // Check previous paragraph
      if (idx > 0 && !paragraphsToHighlight.has(idx - 1)) {
        const prevHasNearbyMatch = highlightedArray.some((h) => h !== idx && Math.abs(h - (idx - 1)) <= 2);
        if (prevHasNearbyMatch) {
          paragraphsToHighlight.add(idx - 1);
        }
      }
      // Check next paragraph
      if (idx < paragraphs.length - 1 && !paragraphsToHighlight.has(idx + 1)) {
        const nextHasNearbyMatch = highlightedArray.some((h) => h !== idx && Math.abs(h - (idx + 1)) <= 2);
        if (nextHasNearbyMatch) {
          paragraphsToHighlight.add(idx + 1);
        }
      }
    }
  }

  // Final pass: determine highlighting based on clustering
  return paragraphs.map((paragraph, index) => ({
    value: paragraph,
    isHighlighted: paragraphsToHighlight.has(index),
  }));
}
