import { stripUnderscores } from '@utils/content-string-utils';
import { getElementEditableContentString } from '../content-utils';
import {
  basicScriptElementsFromEpisodeData,
  loadScriptEpisodeData,
} from '../editorial/db/loader-funcs';
import { deploymentConfig } from '@masala-lib/deployment-config';
import {
  ExtractETForKT,
  KTOf,
  Matchers,
  ScriptElement,
  ScriptLine,
  ScriptLineMatcher,
  ReferenceNumMatcher,
  ScriptElementKind,
  ElementCategory,
  TimesLookup,
} from './llm-types';
import { CreateChatCompletionResponse } from 'openai';
import { epochSecondsFloat, randomString } from '../utils';
import { splitLines } from '@tikka/misc/string-utils';
import {
  EpisodeStructuralDoc,
  EpisodeTranslationDoc,
  EpisodeWordGroupDoc,
} from '../editorial/db/firestore-doc-types';
import {
  ElementIdToTranslation,
  PointAnchor,
  StorageStructural,
  StorageTranslation,
  StorageWordGroup,
  StructuralVersionData,
  Translation,
} from '../editorial-types';
import { getTranslationId } from '@tikka/elements/element-id-utils';
import { DbPaths } from '../editorial/db/db';
import { db } from '@platform/firebase-init';
import {
  normalizeSpecialChars,
  strongNormalizeWordArray,
} from '@masala-lib/misc/editorial-string-utils';
import {
  ElementId,
  IndexRange,
  NO_INDEX,
  idIsOfKind,
} from '@tikka/basic-types';
import {
  ImportScript,
  ProjectTask,
  IdToElement,
  ScriptOptions,
  FlagsData,
  NotesData,
  LintAlert,
} from './project/llm-project-types';
import { ConversationManager } from '@masala-lib/editorial/models/conversation-manager';
import { loaderStatus } from '@masala-lib/firestore-db/constants';
import { when } from 'mobx';
import {
  getMasalaFlag,
  getSamosaFlag,
  structuralTaskComputeElementKeys,
  translationTaskComputeElementKeys,
  vocabTaskComputeElementKeys,
} from './project/llm-project-funcs';
import { computeElementsTimeRanges } from '@masala-lib/editorial/episode-data/episode-data';
import { alertError } from '@app/notification-service';
import { has, isEmpty } from 'lodash';
import {
  loadStructuralDoc,
  loadStructuralVersionsDoc,
} from '@masala-lib/editorial/db/versions-db';
import { computeFilteredVersionsDoc } from '@masala-lib/editorial/db/versions-update';
import { zorchWordGroups } from '@masala-lib/editorial/db/mutation-actions';

export function epochSeconds() {
  return Math.floor(Date.now() / 1000);
}

export const cyrb53a = (str: string, seed = 0) => {
  let h1 = 0xdeadbeef ^ seed,
    h2 = 0x41c6ce57 ^ seed;
  for (let i = 0, ch; i < str.length; i++) {
    ch = str.charCodeAt(i);
    h1 = Math.imul(h1 ^ ch, 0x85ebca77);
    h2 = Math.imul(h2 ^ ch, 0xc2b2ae3d);
  }
  h1 ^= Math.imul(h1 ^ (h2 >>> 15), 0x735a2d97);
  h2 ^= Math.imul(h2 ^ (h1 >>> 15), 0xcaf649a9);
  h1 ^= h2 >>> 16;
  h2 ^= h1 >>> 16;
  return 2097152 * (h2 >>> 0) + (h1 >>> 11);
};

export function filterOnKind<T extends { kind: string }, KT extends KTOf<T>>(
  list: T[],
  kind: KT
): ExtractETForKT<T, KT>[] {
  return list.filter(e => e.kind === kind) as any;
}

export function filterOnKinds<T extends { kind: string }>(
  list: T[],
  kinds: string[]
): T[] {
  return list.filter(e => kinds.includes(e.kind));
}

export function elementIsReference(
  el: ScriptElement,
  referenceKinds: ScriptElementKind[]
) {
  if (!el) {
    return false;
  }
  return el.origin === 'MASALA' && referenceKinds.includes(el.kind);
}

export function elementIsOutput(
  el: ScriptElement,
  outputKinds: ScriptElementKind[]
) {
  if (!el) {
    return false;
  }
  return el.origin !== 'MASALA' && outputKinds.includes(el.kind);
}

export function elementIsComparison(
  el: ScriptElement,
  outputKinds: ScriptElementKind[]
) {
  if (!el) {
    return false;
  }
  return el.origin === 'MASALA' && outputKinds.includes(el.kind);
}

export function elementIsDecorator(
  el: ScriptElement,
  referenceKinds: ScriptElementKind[],
  outputKinds: ScriptElementKind[]
) {
  if (!el) {
    return false;
  }
  return (
    el.origin === 'MASALA' &&
    !referenceKinds.includes(el.kind) &&
    !outputKinds.includes(el.kind)
  );
}

export function elementCategory(
  el: ScriptElement,
  referenceKinds: ScriptElementKind[],
  outputKinds: ScriptElementKind[]
): ElementCategory {
  if (!el) {
    return 'UNDEFINED';
  }
  if (el.origin === 'MASALA') {
    if (elementIsReference(el, referenceKinds)) {
      return 'REFERENCE';
    }
    if (elementIsComparison(el, outputKinds)) {
      return 'COMPARISON';
    }
    return 'DECORATOR';
  } else {
    if (elementIsOutput(el, outputKinds)) {
      return 'OUTPUT';
    }
  }
  throw Error('unrecognized element category');
}

export function compileMatchers(matchers: Matchers) {
  if (matchers.referenceNumMatcher.referenceNumCompiledRegex) {
    throw Error('Matchers already compiled');
  }

  for (const lineMatcher of matchers.lineMatchers) {
    const regex = `^${lineMatcher.beginLineRegexString}(?<text>.*)`;
    lineMatcher.beginLineCompiledRegex = new RegExp(regex);
  }
  const referenceNumMatch =
    matchers.referenceNumMatcher.referenceNumRegexString.replace(
      '\\d+',
      '(?<number>\\d+)'
    );
  const regex = `^${referenceNumMatch}(?<text>.*)`;
  matchers.referenceNumMatcher.referenceNumCompiledRegex = new RegExp(regex);
}

export function hashForElement(el: ScriptElement): string {
  return cyrb53a(el.kind + el.reference + el.text).toString(36);
}

export function recomputeHashForElement(el: ScriptElement) {
  el.hash = hashForElement(el);
}

export function filterUniqueHashes(els: ScriptElement[]): ScriptElement[] {
  const hashes = new Set<string>();
  const result: ScriptElement[] = [];
  for (const el of els) {
    const hash = el.hash;
    if (!hash) {
      continue;
    }
    if (hashes.has(hash)) {
      continue;
    }
    hashes.add(hash);
    result.push(el);
  }
  return result;
}

export async function hasExistingContent(
  unitId: string,
  params: {
    task: ProjectTask;
    subtask: string; // will likely want a wider range of metadata props
  }
): Promise<boolean> {
  const { task, subtask } = params;
  const episodeData = await loadScriptEpisodeData(unitId);
  const elements = basicScriptElementsFromEpisodeData(episodeData).values;
  let noOutput = true;
  const outputStructuralKinds = ['CHAPTER', 'PASSAGE'];
  switch (task) {
    case 'translation':
      const existingTranslations = Object.values(episodeData.translations);
      if (subtask === 'transcript') {
        // TODO is assuming that if any translations found then there are translations for sentences, valid in our process?
        noOutput = existingTranslations.every(
          e => !idIsOfKind(e.elementId, 'SENTENCE')
        );
      } else {
        // structural
        noOutput = existingTranslations.every(e =>
          outputStructuralKinds.every(k => !idIsOfKind(e.elementId, k))
        );
      }
      break;
    case 'structural':
      noOutput = elements.every(e => !outputStructuralKinds.includes(e.kind));
      break;
    case 'vocab':
      noOutput = elements.every(
        e => !(e.kind === 'WORD_GROUP' && e.subKind === 'VOCAB')
      );
      break;
    default:
      throw Error('unrecognized task');
  }
  return !noOutput;
}

// loads the reference script element data from the main masala data store
export async function fetchReferenceScriptData(
  unitId: string,
  params: {
    task: ProjectTask;
    subtask: string; // will likely want a wider range of metadata props
  }
): Promise<[ScriptElement[], ScriptElement[], number[], TimesLookup, string]> {
  let { task, subtask } = params;
  const kindCounts: Map<string, number> = new Map();
  const episodeData = await loadScriptEpisodeData(unitId);
  const allContent = basicScriptElementsFromEpisodeData(episodeData);
  const content = allContent.filterByKinds([
    'SENTENCE',
    'CHAPTER',
    'PASSAGE',
    'PARAGRAPH',
  ]);

  let wordTimes: TimesLookup = null;
  let audioUrl: string = null;
  try {
    const playerDataUrl = `${deploymentConfig.masalaServerUrl}/simple_player_data?key=${unitId}`;
    const resp = await fetch(playerDataUrl);
    const playerData = await resp.json();
    wordTimes = playerData.wordTimes;
    audioUrl = playerData.audioUrl;
  } catch (error) {
    alertError(`Error fetching player data: ${error}`);
  }
  let timesLookup: TimesLookup = null;
  if (wordTimes) {
    // TODO cleanup typing
    timesLookup = computeElementsTimeRanges(content as any, wordTimes);
  }

  let excludedKinds: string[] = [];
  switch (subtask) {
    case 'transcript':
      excludedKinds = ['CHAPTER', 'PASSAGE'];
      break;
    case 'structural':
      excludedKinds = ['SENTENCE', 'SPEAKER_LABEL'];
      break;
    default:
    // no implicit filtering for exploratory projects
  }

  const words = content.words;
  const existingTranslations = episodeData.translations;

  const referenceElements: ScriptElement[] = [];
  const translationElements: ScriptElement[] = [];
  const timestamp = epochSeconds();
  const sectionBoundaries: number[] = [];
  let index = 0;
  for (const element of content.values) {
    if (element.kind === 'PARAGRAPH' && !element.content?.text) {
      continue;
    }

    const text = stripUnderscores(
      getElementEditableContentString(element, words)
    );
    const id = element.id;
    const kind = element.kind === 'PARAGRAPH' ? 'SPEAKER_LABEL' : element.kind;
    const reference = index + 1;
    let handle = null;
    if (task === 'freeform') {
      const kindCount = (kindCounts.get(kind) || 0) + 1;
      switch (kind) {
        case 'CHAPTER':
          handle = `[Chapter ${kindCount}] `;
          break;
        case 'PASSAGE':
          handle = `[Passage ${kindCount}] `;
          break;
        case 'SPEAKER_LABEL':
          handle = '';
          break;
        case 'SENTENCE':
          handle = `[${kindCount}] `;
          break;
        default:
          throw Error('unrecognized kind');
      }
      kindCounts.set(kind, kindCount);
    }

    if (kind === 'CHAPTER') {
      sectionBoundaries.push(reference);
    }

    if (excludedKinds.includes(kind)) {
      continue;
    }

    if (
      kind === 'SENTENCE' ||
      (task === 'translation' && kind !== 'SPEAKER_LABEL') ||
      task === 'freeform'
    ) {
      index++;
    }

    const el: ScriptElement = {
      kind,
      id,
      origin: 'MASALA',
      anchor: element.anchor,
      reference,
      handle,
      claimedSlot: null,
      slots: null,
      groupKey: null,
      text,
      timestamp,
      hash: null,
    };
    switch (task) {
      case 'translation':
        translationTaskComputeElementKeys(el);
        break;
      case 'structural':
      case 'freeform':
        structuralTaskComputeElementKeys(el);
        break;
      case 'vocab':
        vocabTaskComputeElementKeys(el);
        break;
      default:
        throw Error('unrecognized task');
    }
    // recomputeHashForElement(el);
    referenceElements.push(el);
    const existingTranslation = existingTranslations[id];
    if (existingTranslation) {
      const translation: ScriptElement = {
        kind: 'TRANSLATION',
        id: existingTranslation.id,
        origin: 'MASALA',
        reference,
        claimedSlot: null,
        slots: null,
        groupKey: null,
        text: existingTranslation.content as string,
        timestamp,
        hash: null,
      };
      // recomputeHashForElement(translation);
      switch (task) {
        case 'translation':
          translationTaskComputeElementKeys(translation);
          break;
        case 'structural':
        case 'freeform':
          structuralTaskComputeElementKeys(translation);
          break;
        case 'vocab':
          vocabTaskComputeElementKeys(translation); // TODO does this even make sense?
          break;
        default:
          throw Error('unrecognized task');
      }

      translationElements.push(translation);
    }
  }
  sectionBoundaries.push(index + 1);
  // todo: return a map!
  return [
    referenceElements,
    translationElements,
    sectionBoundaries,
    timesLookup,
    audioUrl,
  ];
}

// todo: refactor to use task type and exclude prefix for structural when structural
// also make sure used by structural interleaved merge view
export let kindMarkdownPrefix = {
  CHAPTER: '# ',
  PASSAGE: '## ',
  SENTENCE: '',
  SPEAKER_LABEL: '',
  TRANSLATION: '> ',
} as any;

export function filterReferenceWithScriptOptions(
  referenceScript: ScriptElement[],
  elementIdToTranslation: IdToElement,
  options: ScriptOptions
) {
  const {
    chapters,
    passages,
    speakers,
    sentences,
    translations,
    sub_translations,
  } = options;
  const result: ScriptElement[] = [];
  for (const el of referenceScript) {
    if (!chapters && el.kind === 'CHAPTER') {
      continue;
    }
    if (!passages && el.kind === 'PASSAGE') {
      continue;
    }
    if (!speakers && el.kind === 'SPEAKER_LABEL') {
      continue;
    }
    if (!sentences && el.kind === 'SENTENCE') {
      continue;
    }
    if (!translations && el.kind === 'TRANSLATION') {
      continue;
    }
    // always present english in merge view for structural work
    if (sub_translations) {
      const translation = elementIdToTranslation[el.id];
      if (translation?.text) {
        el.text = translation.text;
      }
    }
    result.push(el);
  }
  return result;
}

export function getLlmReferenceTextForTranslation(
  referenceScript: ScriptElement[],
  elementIdToTranslation: IdToElement,
  options: ScriptOptions,
  task: ProjectTask
): string {
  const { chapters, passages, speakers, sentences, numbers, translations } =
    options;
  const lines: string[] = [];
  for (const el of referenceScript) {
    if (!chapters && el.kind === 'CHAPTER') {
      continue;
    }
    if (!passages && el.kind === 'PASSAGE') {
      continue;
    }
    if (!speakers && el.kind === 'SPEAKER_LABEL') {
      continue;
    }
    if (!sentences && el.kind === 'SENTENCE') {
      continue;
    }
    if (!translations && el.kind === 'TRANSLATION') {
      continue;
    }
    let showNumber =
      numbers &&
      el.kind !== 'SPEAKER_LABEL' &&
      el.kind !== 'TRANSLATION' &&
      !(
        task === 'structural' &&
        (el.kind === 'CHAPTER' || el.kind === 'PASSAGE')
      );
    let referenceText = showNumber ? `[${el.reference}] ` : '';
    if (task === 'freeform') {
      referenceText = el.handle ? `${el.handle} ` : '';
    }
    let text = referenceTextForElement(el, elementIdToTranslation, options);
    if (el.kind === 'SPEAKER_LABEL') {
      text = text + ':';
    }
    if (el.kind === 'TRANSLATION') {
      lines.pop(); // discard the previous empty line
    }
    if (text) {
      lines.push(`${kindMarkdownPrefix[el.kind]}${referenceText}${text}`);
    }
    lines.push('');
  }
  return lines.join('\n') + '\n';
}

export function referenceTextForElement(
  element: ScriptElement,
  elementIdToTranslation: IdToElement,
  options: ScriptOptions
) {
  const { sub_translations } = options;
  if (sub_translations) {
    const translation = elementIdToTranslation[element.id];
    return translation?.text || undefined;
  } else {
    return element.text;
  }
}

// TODO need to pass ref -> reference element map to lookup reference element for translations
// TODO how to unparse for a loose parser because the prompt could result in different syntaxes
export function regenerateTextForElementsTranslationParser(
  elements: ScriptElement[],
  numbers: boolean = true
) {
  const lines: string[] = [];
  for (const el of elements) {
    // TODO this is not going to work for translation kinds need to also find kind of translated element
    // to lookup correct prefix
    let kind = el.kind;
    const referenceText = numbers ? `[${el.reference}] ` : '';
    switch (kind) {
      case 'UNRECOGNIZED':
        lines.push(el.text);
        break;
      default:
        lines.push(`${kindMarkdownPrefix[el.kind]}${referenceText}${el.text}`);
    }
  }
  return lines.join('\n\n') + '\n';
}

export function getReferenceToMasalaIdMap(referenceScript: ScriptElement[]): {
  [index: string]: string;
} {
  const result: { [index: string]: string } = {} as any;
  for (const el of referenceScript) {
    if (el.kind === 'TRANSLATION') {
      continue;
    }
    result[el.reference] = el.id;
  }
  return result;
}

export function getReferenceToMasalaAnchorMap(
  referenceScript: ScriptElement[]
): {
  [index: string]: PointAnchor;
} {
  const result: { [index: string]: PointAnchor } = {} as any;
  for (const el of referenceScript) {
    result[el.reference] = el.anchor;
  }
  return result;
}

const openRouterLlmModel = 'openai/gpt-4-32k';

const openRouterLlmHeaders = {
  'HTTP-Referer': 'http://typingmind.com',
  'X-Title': 'typingmind',
  'Content-Type': 'application/json',
  Authorization:
    'Bearer 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',
};

const openRouterLlmEndpoint = 'https://openrouter.ai/api/v1/chat/completions';

export async function llmMakeOneRequest(request: string): Promise<string> {
  const messages = [
    { role: 'system', content: 'You are a helpful assistant.' },
    { role: 'user', content: request },
  ];

  const data = { model: openRouterLlmModel, messages };

  const response = await fetch(openRouterLlmEndpoint, {
    method: 'POST',
    headers: openRouterLlmHeaders,
    body: JSON.stringify(data),
  });
  const result: CreateChatCompletionResponse = await response.json();
  return result.choices[0].message.content;
}

export const matchers: Matchers = {
  referenceNumMatcher: {
    referenceNumRegexString: '\\[\\d+\\]',
  },
  lineMatchers: [
    {
      kind: 'TRANSLATION',
      beginLineRegexString: '>',
    },
    {
      kind: 'CHAPTER',
      beginLineRegexString: '# ',
    },
    {
      kind: 'PASSAGE',
      beginLineRegexString: '## ',
    },
  ],
};

compileMatchers(matchers);

export function findMatch(
  line: string,
  matchers: ScriptLineMatcher[]
): [RegExpMatchArray, ScriptLineMatcher] {
  for (const matcher of matchers) {
    const m = line.match(matcher.beginLineCompiledRegex);
    if (m) {
      return [m, matcher];
    }
  }
  return [null, null];
}

export class LLMResponseScriptScanner {
  lineMatchers: ScriptLineMatcher[];
  referenceNumMatcher: ReferenceNumMatcher;

  constructor(matchers: Matchers) {
    this.lineMatchers = matchers.lineMatchers;
    this.referenceNumMatcher = matchers.referenceNumMatcher;
  }

  scan(
    text: string,
    defaultKind: ScriptElementKind = 'SENTENCE'
  ): ScriptLine[] {
    const result: ScriptLine[] = [];
    // TODO normalize text?
    let lines = splitLines(text);
    lines = lines.map(l => l.trim());
    lines = lines.filter(l => l);
    for (const [index, line] of lines.entries()) {
      const lineResult: ScriptLine = {} as any;
      let text = line;
      let [m, matcher] = findMatch(text, this.lineMatchers);
      lineResult.lineIndex = index;
      if (m) {
        lineResult.kind = matcher.kind;
        let [innerM, innerMatcher] = findMatch(
          m.groups.text.trim(),
          this.lineMatchers
        );
        if (innerM) {
          m = innerM;
          matcher = innerMatcher;
        }
      }
      if (m) {
        lineResult.text = m.groups.text;
        text = lineResult.text.trim();
      }
      m = text.match(this.referenceNumMatcher.referenceNumCompiledRegex);

      if (m) {
        if (!lineResult.kind) {
          lineResult.kind = defaultKind;
        } else {
        }
        lineResult.referenceNumber = Number(m.groups.number);
        lineResult.text = m.groups.text;
      }
      if (!lineResult.kind) {
        lineResult.kind = 'UNRECOGNIZED';
        lineResult.text = text;
      }
      lineResult.text = lineResult.text.trim();
      result.push(lineResult);
    }
    return result;
  }
}

export function llmResponseParseScriptLines(
  lines: ScriptLine[],
  timestamp0: number = 0
): ScriptElement[] {
  const result: ScriptElement[] = [];
  const sentences = filterOnKind(lines, 'SENTENCE');
  const sentenceLineIndexes = sentences.map(s => s.lineIndex);
  const timestamp = timestamp0 || epochSeconds();
  for (const [index, line] of lines.entries()) {
    // TODO script element should be initialize in proper field order
    const resultElement: ScriptElement = {} as any;
    const kind = line.kind;
    resultElement.kind = kind;
    resultElement.origin = 'PARSE';
    resultElement.text = line.text;
    resultElement.timestamp = timestamp;
    switch (kind) {
      case 'SENTENCE':
      case 'TRANSLATION':
        resultElement.reference = line.referenceNumber;
        break;
      case 'UNRECOGNIZED':
        break;
      default:
        if (line.referenceNumber) {
          resultElement.reference = line.referenceNumber;
        } else {
          // TODO
          // const nextSentenceIndex =
          //   sentenceLineIndexes[searchSorted(sentenceLineIndexes, index)];
          // const nextSentence = sentences[nextSentenceIndex];
          // resultElement.reference = nextSentence.referenceNumber;
        }
    }
    // const hash = hashForElement(resultElement);
    // resultElement.id = hash;
    // resultElement.hash = hash;
    translationTaskComputeElementKeys(resultElement);
    result.push(resultElement);
  }
  return result;
}

export function llmTranslationTaskParseResponseText(
  text: string,
  timestamp?: number
): ScriptElement[] {
  // need to flatten smart quotes
  text = normalizeSpecialChars(text);

  const scanner = new LLMResponseScriptScanner(matchers);
  const lines = scanner.scan(text, 'SENTENCE');
  const result = llmResponseParseScriptLines(lines, timestamp);
  for (const el of result) {
    if (el.kind !== 'UNRECOGNIZED') {
      el.kind = 'TRANSLATION';
      translationTaskComputeElementKeys(el, true);
      // hack because compute elements does not overwrite ids
      el.id = el.hash;
      // const hash = hashForElement(el);
      // el.id = hash;
      // el.hash = hash;
    }
  }
  return result;
}

export function llmVocabTaskParseResponseText(
  text: string,
  timestamp?: number
): ScriptElement[] {
  // need to flatten smart quotes
  // TODO hacking think about best way to factor
  text = normalizeSpecialChars(text);

  const scanner = new LLMResponseScriptScanner(matchers);
  const lines = scanner.scan(text, 'SENTENCE');
  const result = llmResponseParseScriptLines(lines, timestamp);
  for (const el of result) {
    if (el.kind !== 'UNRECOGNIZED') {
      el.kind = 'VOCAB';
      vocabTaskComputeElementKeys(el, true);
      // hack because compute elements does not overwrite ids
      el.id = el.hash;
      // TODO absurd hacking to deal with id changed
      vocabTaskComputeElementKeys(el, true);
      // const hash = hashForElement(el);
      // el.id = hash;
      // el.hash = hash;
    }
  }
  return result;
}

export async function importLlmTranslationIntoMasala({
  unitId,
  elements,
  locale,
  referenceToId,
  elementFlags,
  notes,
  noPersist = false, // for ad hoc testing
  swapped,
}: {
  unitId: string;
  elements: ScriptElement[];
  locale: string;
  referenceToId: { [index: string]: string };
  elementFlags?: FlagsData;
  notes?: NotesData;
  noPersist: boolean;
  swapped: boolean;
}): Promise<void> {
  notes = notes ?? {};
  const translations: ElementIdToTranslation = {} as any;
  const timestamp = epochSecondsFloat();
  const comments: Map<string, string> = new Map();
  for (const el of elements) {
    if (!el.reference) {
      console.log('no reference number for translation: ' + el.text);
      continue;
    }
    const elementId = referenceToId[el.reference] as any;
    if (!elementId) {
      console.log('reference does not correspond to id: ' + el.reference);
      continue;
    }
    const translationId = getTranslationId(elementId, locale);
    const translation: Translation = {
      id: translationId,
      kind: 'TRANSLATION',
      elementId,
      locale,
      content: el.text.trim(),
      author: 'IMPORT',
      timestamp,
    };
    // @jason: it appears that we need to index by the elementId not the translationsId here
    // translations[translationId] = translation;
    translations[elementId] = translation;
    if (elementFlags) {
      const flags = elementFlags[elementId];
      const flagged = getMasalaFlag(flags) || getSamosaFlag(flags);
      if (flagged) {
        const note = notes[elementId] ?? 'TODO FROM SAMOSA';
        comments.set(translationId, note);
      }
    }
  }
  const data: EpisodeTranslationDoc = {
    items: { [locale]: { translations } },
  };
  console.log(data);
  const paths = new DbPaths(db, unitId);
  const translationsDocRef = paths.translationsDocRef;

  if (noPersist) {
    // commented out because response data is dummy for format
    // so would right wrong translations
    console.log('skipping persist');
  } else {
    if (swapped) {
      const unitDocRef = paths.unitMetadataDocRef;
      await unitDocRef.set({ structuralContentInL1: true }, { merge: true });
    }
    await translationsDocRef.set(data, { merge: true });
    await importCommentsIntoMasala(unitId, comments, 'Samosa');
  }
}

export async function importCommentsIntoMasala(
  unitId: string,
  comments: Map<string, string>,
  attribution: string
) {
  if (!comments.size) {
    return;
  }
  const manager = new ConversationManager({ episode: unitId });
  manager.listenMode = false;
  manager.load();
  await when(() => manager.status === loaderStatus.COMPLETE);
  for (const [elementId, text] of comments.entries()) {
    manager.postComment(elementId as any, attribution, text);
  }
  manager.close();
}

export async function importLlmStructuralIntoMasala({
  importScript,
  // todo: figure out best way to overwrite w/o blowing away speaker labels
  elementFlags,
  notes,
  merge = true, // when false, will replace all existing structural content
}: {
  importScript: ImportScript;
  elementFlags?: FlagsData;
  notes?: NotesData;
  merge: boolean;
}): Promise<void> {
  const { unitId, elements, referenceToId, referenceToAnchor } = importScript;
  const timestamp = epochSecondsFloat();
  const filter = (el: StorageStructural) => el.kind === 'PARAGRAPH';
  const items: { [index: string]: StorageStructural } = {};
  const slotComments: Map<string, string> = new Map();
  const comments: Map<string, string> = new Map();
  if (!merge) {
    const existingStructural = await loadStructuralDoc(unitId);
    for (const el of Object.values(existingStructural.items)) {
      if (filter(el)) {
        items[el.id] = el;
      }
    }
  }
  for (const el of elements) {
    if (!el.reference) {
      console.log('no reference number for element: ' + el.text);
      continue;
    }
    if (el.slots) {
      if (elementFlags) {
        const flags = elementFlags[el.id];
        const flagged = getMasalaFlag(flags) || getSamosaFlag(flags);
        if (flagged) {
          const note = notes[el.id] ?? 'TODO FROM SAMOSA';
          for (const slot of el.slots) {
            slotComments.set(slot, note);
          }
        }
      }
    }
    // don't import the summary elements and breaks
    if (el.kind.endsWith('_SUMMARY') || el.kind.endsWith('_BREAK')) {
      continue;
    }
    const elementId = referenceToId[el.reference] as any;
    if (!elementId) {
      console.error('reference does not correspond to id: ' + el.reference);
      continue;
    }
    const anchor = referenceToAnchor[el.reference];
    const id = `${el.kind}:${randomString(12)}` as ElementId;
    const item: StorageStructural = {
      id: id as any, // todo: properly type
      kind: el.kind as any, // @jason i couldn't figure out how to type this correctly 'CHAPTER' | 'PASSAGE',
      anchor,
      author: 'SAMOSA', // todo
      content: {
        text: el.text.trim(),
      },
      timestamp,
    };
    items[id] = item;
    if (slotComments.get(el.claimedSlot)) {
      const slotComment = slotComments.get(el.claimedSlot);
      comments.set(item.id, slotComment);
    }
  }
  const data: EpisodeStructuralDoc = {
    items,
  };
  console.log(data);
  const paths = new DbPaths(db, unitId);
  if (!merge) {
    const existingStructuralVersionsDoc = await loadStructuralVersionsDoc(
      unitId
    );
    const structuralVersionsUpdate =
      computeFilteredVersionsDoc<StructuralVersionData>(
        existingStructuralVersionsDoc,
        filter
      );
    const structuralVersionsDocRef = paths.structuralVersionsDocRef;
    await structuralVersionsDocRef.set(structuralVersionsUpdate);
  }

  const structuralDocRef = paths.structuralDocRef;
  await structuralDocRef.set(data, { merge });
  await importCommentsIntoMasala(unitId, comments, 'Samosa');
  if (elementFlags) {
    await importExtendedScopeCommentsIntoMasala(unitId, notes, elementFlags);
  }
}

// export const dummyLlmResponse = [
//   '[1] De Corinto para la capital de Guatemala, de Guatemala para Tecún Umán, en Tecún Umán descansas.',
//   '> [1] From Corinto to the capital of Guatemala, from Guatemala to Tecún Umán, in Tecún Umán you rest.',
//   '',
//   '[2] Luego brincas para México.',
//   '> [2] Then you hop over to Mexico.',
//   '',
//   '[3] Ahí es donde empezar el juego.',
//   "> [3] That's where the game begins.",
//   '',
//   '[4] Chiapas.',
//   '> [4] Chiapas.',
//   '',
//   '[5] De Chiapas para Veracruz...',
//   '> [5] From Chiapas to Veracruz...',
//   '',
//   '[6] Este es Carlos.',
//   '> [6] This is Carlos.',
//   '',
//   '[7] O bueno... llamémoslo “Carlos”.',
//   '> [7] Well... let\'s just call him "Carlos."',
//   '',
//   '[8] Y este periplo que describe es su ruta de trabajo.',
//   '> [8] And this journey he describes is his work route.',
//   '',
// ].join('\n');

// export async function llmTestTranslationTaskFunctions(
//   unitId: string,
//   { useDummyData }: { useDummyData: boolean }
// ) {
//   console.log('TESTING LLM FUNCS');
//   const [referenceScript, _] = await fetchReferenceScriptData(unitId);
//   console.log('referenceScript', referenceScript);
//   const simplePrompt =
//     'Translate each line of this Spanish-language transcript into English, and prepend each translated line with its respective line number and an angle bracket (>). Keep the original line, followed by the translated line.\n\n';
//   const locale = 'en';
//   const request =
//     simplePrompt + getLlmReferenceTextForTranslation(referenceScript, {});

//   console.log('llm request', request);

//   let response: string;
//   // disable actually LLM requests during debugging because Miedo cost 40 cents
//   // BE AWARE it took about 2 minutes for the result to return with Miedo which has 49 lines
//   if (useDummyData) {
//     console.log('using dummy response');
//     response = dummyLlmResponse;
//   } else {
//     console.log('awaiting llm response...');
//     response = await llmMakeOneRequest(request);
//     console.log('llm response', response);
//   }
//   const parsed = llmTranslationTaskParseResponseText(response);
//   console.log('parsed', parsed);
//   const translations = filterOnKind(parsed, 'TRANSLATION');
//   console.log('translations', translations);
//   const referenceToId = getReferenceToMasalaIdMap(referenceScript);
//   console.log('referenceToId', referenceToId);
//   await importLlmTranslationIntoMasala({
//     unitId,
//     elements: translations,
//     locale,
//     referenceToId,
//     noPersist: useDummyData,
//   });
// }

const precedence: ScriptElementKind[] = [
  'CHAPTER_BREAK',
  'CHAPTER',
  'CHAPTER_SUMMARY',
  'PASSAGE_BREAK',
  'PASSAGE',
  'PASSAGE_SUMMARY',
  'SPEAKER_LABEL',
  'SENTENCE',
  'TRANSLATION',
  'VOCAB',
];

// todo: this will need to change based on two-column vs single-column
// sort first by reference number, then make sure all reference elements come before
// parsed/added elements, finally sort by kind
export function sortScriptElements(elements: ScriptElement[]): ScriptElement[] {
  const result = [...elements];
  const sortProjection = (element: ScriptElement) => {
    const reference = element.reference;
    // any have 32 bits for key value now
    // const isReference = element.origin === 'MASALA';
    // isRef not needed with structural now on the left in two-column view
    // const isRefPrecidence: number = (isReference ? 0 : 1) << 8;
    const kindPrecedence: number = precedence.indexOf(element.kind);
    // return (reference << 9) | isRefPrecidence | kindPrecedence;
    return (reference << 9) | kindPrecedence;
  };

  fastSortOnIntKey2(result, sortProjection);
  return result;
}

export function fastSortOnIntKey2(
  array: any[],
  keyFunction: (el: any) => number
) {
  const count = array.length;

  const sorting64Array = new BigUint64Array(count);
  // assuming little endian, which is true for all modern processors (ARM, x86, etc.)
  const sorting32Array = new Uint32Array(sorting64Array.buffer);

  // TODO optimize loops
  for (let i = 0; i < count; i++) {
    const index = i << 1;
    sorting32Array[index] = i;
    sorting32Array[index + 1] = keyFunction(array[i]);
  }
  sorting64Array.sort(); // native numeric sort
  const copiedArray = array.slice();
  for (let i = 0; i < count; i++) {
    const index = i << 1;
    array[i] = copiedArray[sorting32Array[index]];
  }
}

// // place holder
// const DEFAULT_TRANSLATION_PROMPT = `Translate each line of the below Spanish-language transcript into English, and prepend each translated line with its respective line number and an angle bracket (>). Keep the original line, followed by the translated line. Preserve all punctuation.

// Here's an example:
// Input:
// [1] Luego brincas para México.
// [2] Ahí es donde empezar el juego.

// Output:
// [1] Luego brincas para México.
// > [1] Then you jump to Mexico.

// [2] Ahí es donde empezar el juego.
// > [2] That's where the game begins.

// The transcript to translate begins now:`;

// export function defaultTranslationPrompt() {
//   return DEFAULT_TRANSLATION_PROMPT;
// }

// // we're going to need a better way to manage different canned prompts very soon
// export function defaultChapterStructurePrompt() {
//   return `The following is a transcript from a Spanish podcast episode of 30 minutes called “La búsqueda”.  Using this transcript, divide the content into about 8 equal length chapters and create Spanish chapter titles. Provide the number line ranges, so I can visualize how you break down the text into different sections.
// Avoid repeating names and words in the titles. Be creative.
// Format each line in the form of:
// Chapter 1: Memorias de infancia (Lines 1-53)

// Here is the script:`;
// }

// export function defaultStructuralPrompt() {
//   return `Below is a transcript from a Spanish podcast episode. Using this transcript, divide the content into about 8 equal length chapters and create Spanish chapter titles.
// Prefix each chapter title with '#' and include the line range in parentheses.
// Divide the chapters into subchapters that are approximately 200 to 500 words long and write a title for each subchapter. Prefix each subchapter title with a '##' prefix and include the line range in parentheses.

// Here is an example of the desired output format:
// # Memorias de infancia (1-53)
// ## Una advertencia para oyentes (1-3)
// ## La infancia de Mario Daniel Navarro (4-27)
// ## Un misterioso amigo de la familia (28-46)
// ## Otro asunto curioso para Mario (47-53)

// # Más que un vínculo de sangre (54-106)
// ## Pistas de que Mario fue adoptado (54-67)
// ## El papá de Mario se refería a la adopción indirectamente (68-75)
// ## La relación de Mario con su familia (76-84)
// ## La curiosidad de Mario aumenta (85-91)
// ## Mario teme despertar un trauma pasado (92-98)
// ## Una reacción inolvidable (99-106)

// Here is the transcript:
// `;
// }

type ParsedVocabData = {
  section: string;
  canonical: string;
  definition: string;
};

export function parseVocabElement(el: ScriptElement): ParsedVocabData {
  const text = el.text;
  const parts = text.split('=');
  if (parts.length !== 2) {
    return null;
  }
  let section = parts[0].trim();
  // parse out square bracketed text as 'canonical'
  let m = section.match(/(.*)\[([^\]]+)\].*/);
  let canonical = null;
  if (m) {
    section = m[1].trim();
    canonical = m[2].trim();
  }
  // simply strip parenthesised text (todo: capture some how?)
  m = section.match(/(.*)\(([^\]]+)\).*/);
  if (m) {
    section = m[1].trim();
  }
  const definition = parts[1].trim();
  return { section, canonical, definition };
}

export function findMatchWithinIndex(a: any[], b: any[]) {
  const len = b.length;
  const alen = a.length - len + 1;
  for (let i = 0; i < alen; i++) {
    let match = true;
    for (let j = 0; j < len; j++) {
      if (a[i + j] !== b[j]) {
        match = false;
        break;
      }
    }
    if (match) {
      return i;
    }
  }
  return NO_INDEX;
}

export function normalizedMatchWithinWordIndex(a: string, b: string) {
  // TODO deal with em dashes
  const aParts = stringToStrongNormalizedWordArray(a);
  const bParts = stringToStrongNormalizedWordArray(b);
  return findMatchWithinIndex(aParts, bParts);
}

export function stringToStrongNormalizedWordArray(s: string) {
  return strongNormalizeWordArray(s.split(/\s+/)).filter(w => w);
}

export function lintVocab(
  vocabs: ScriptElement[],
  sentenceLookup: { [index: number]: ScriptElement }
) {
  // TODO move to correct file not use project types from llm funcs
  const result: LintAlert[] = [];
  const claimedAddresses = new Set();
  for (const vocab of vocabs) {
    const parsed = parseVocabElement(vocab);
    if (!parsed) {
      continue;
    }
    const ref = vocab.reference;
    const sentence = sentenceLookup[ref];
    const sentenceText = sentence.text;
    const aParts = stringToStrongNormalizedWordArray(sentenceText);
    const bParts = stringToStrongNormalizedWordArray(parsed.section);
    const index = findMatchWithinIndex(aParts, bParts);
    if (index === NO_INDEX) {
      result.push({
        kind: 'RECONCILE',
        message: `cannot find match for vocab section in sentence`,
        key: `RECONCILE:${vocab.hash}`,
        elementId: vocab.id,
        reference: vocab.reference,
        level: 'WARNING',
      });
    } else {
      const begin = index;
      const end = begin + bParts.length - 1;
      const indexRange = { begin, end };
      const prefix = sentence.id;
      if (rangeIntersectsSet(indexRange, claimedAddresses, prefix)) {
        result.push({
          kind: 'OVERLAP',
          message: `vocab intersects with other vocab`,
          key: `OVERLAP:${vocab.hash}`,
          elementId: vocab.id,
          reference: vocab.reference,
          level: 'WARNING',
        });
      }
      addRangeToSet(indexRange, claimedAddresses, prefix);
    }
  }
  return result.length ? result : null;
}

export function rangeIntersectsSet(
  range: IndexRange,
  set: Set<any>,
  prefix: any = null
) {
  for (let i = range.begin; i <= range.end; i++) {
    const val = prefix ? prefix + i : i;
    if (set.has(val)) {
      return true;
    }
  }
  return false;
}

export function addRangeToSet(
  range: IndexRange,
  set: Set<any>,
  prefix: any = null
) {
  for (let i = range.begin; i <= range.end; i++) {
    const val = prefix ? prefix + i : i;
    set.add(val);
  }
}

export async function importLlmVocabIntoMasala({
  importScript,
  locale,
  elementFlags,
  notes,
  merge = true,
}: {
  importScript: ImportScript;
  locale: string;
  elementFlags?: FlagsData;
  notes?: NotesData;
  merge: boolean;
}) {
  const { unitId, elements, referenceToId } = importScript;
  const timestamp = epochSecondsFloat();
  const episodeData = await loadScriptEpisodeData(unitId);
  const allContent = basicScriptElementsFromEpisodeData(episodeData);
  const sentences = allContent.filterByKinds(['SENTENCE']);
  const words = allContent.words;
  const filter = (wg: StorageWordGroup) => wg.subKind !== 'VOCAB';
  const wordGroups = allContent.filterByKind('WORD_GROUP');
  let keptWordGroups = merge ? wordGroups : wordGroups.filter(filter);
  const claimedAddresses = new Set<number>();
  const containsClaimed = (range: IndexRange) =>
    rangeIntersectsSet(range, claimedAddresses);
  const addToClaimed = (range: IndexRange) =>
    addRangeToSet(range, claimedAddresses);

  const outputMasalaVocabs: StorageWordGroup[] = [];
  const definitionTranslations: StorageTranslation[] = [];
  const comments: Map<string, string> = new Map();
  for (const vocab of elements) {
    if (!vocab.reference) {
      console.log('no reference number for vocab: ' + vocab.text);
      continue;
    }
    const parsed = parseVocabElement(vocab);
    if (!parsed) {
      continue;
    }
    const ref = vocab.reference;
    const sentenceId = referenceToId[ref];
    const sentence = sentences.getElement(sentenceId as any);
    if (!sentence) {
      console.error(`missing sentence for vocab: ${ref}`);
      continue;
    }
    const sentenceText = getElementEditableContentString(
      sentence,
      allContent.words
    );
    const aParts = stringToStrongNormalizedWordArray(sentenceText);
    const bParts = stringToStrongNormalizedWordArray(parsed.section);
    const index = findMatchWithinIndex(aParts, bParts);

    if (index === NO_INDEX) {
      console.log('cannot find match for vocab');
      console.log(ref);
      console.log(sentenceText);
      console.log(
        `${parsed.section} [${parsed.canonical}] = ${parsed.definition}`
      );
      continue;
    }
    console.log('***********');
    console.log(sentenceText);
    console.log(
      `${parsed.section} [${parsed.canonical}] = ${parsed.definition}`
    );
    console.log(index);

    const begin = sentence.address + index;
    const end = begin + bParts.length - 1;
    const indexRange = { begin, end };
    if (containsClaimed(indexRange)) {
      // do not allow creating overlapping word groups
      continue;
    }
    addToClaimed(indexRange);
    const idRange = words.indexRangeToIdRange(indexRange);
    console.log(idRange);
    const intersecting =
      keptWordGroups.getElementsStartWithinWordIndexRange(indexRange);
    if (intersecting) {
      console.log(intersecting);
      console.log('vocab intersects with existing word group');
      console.log(ref);
      console.log(sentenceText);
      console.log(
        `${parsed.section} [${parsed.canonical}] = ${parsed.definition}`
      );
      continue;
    }
    const preserveCase = parsed.section !== parsed.section.toLowerCase();
    const vocabWordGroup: StorageWordGroup = {
      id: `WORD_GROUP:${randomString(12)}`,
      kind: 'WORD_GROUP',
      subKind: 'VOCAB',
      content: {
        preserveCase,
        duplicateOk: false,
      },
      anchor: { wordId: idRange.begin, endWordId: idRange.end },
      author: 'SAMOSA',
      timestamp,
    };
    if (parsed.canonical) {
      vocabWordGroup.content.canonical = parsed.canonical;
    }
    console.log(vocabWordGroup);
    if (elementFlags) {
      const flags = elementFlags[vocab.id];
      const flagged = getMasalaFlag(flags) || getSamosaFlag(flags);
      if (flagged) {
        const note = notes[vocab.id] ?? 'TODO FROM SAMOSA';
        comments.set(vocabWordGroup.id, note);
      }
    }

    outputMasalaVocabs.push(vocabWordGroup);
    const definition: StorageTranslation = {
      id: getTranslationId(vocabWordGroup.id, locale),
      kind: 'TRANSLATION',
      elementId: vocabWordGroup.id,
      locale,
      content: {
        note: parsed.definition,
      },
      author: 'SAMOSA',
      timestamp,
    };
    definitionTranslations.push(definition);
  }
  if (!outputMasalaVocabs.length) {
    return;
  }
  const wordGroupItems: { [index: string]: StorageWordGroup } = {};
  for (const vocab of outputMasalaVocabs) {
    wordGroupItems[vocab.id] = vocab;
  }
  const wordGroupUpdate: EpisodeWordGroupDoc = {
    items: wordGroupItems,
  };
  const dbPaths = new DbPaths(db, unitId);
  const wordGroupDocRef = dbPaths.wordGroupsDocRef;
  console.log(wordGroupUpdate);
  if (!merge) {
    await zorchWordGroups(unitId, filter);
  }
  await wordGroupDocRef.set(wordGroupUpdate, { merge: true });
  const translations: ElementIdToTranslation = {};
  for (const translation of definitionTranslations) {
    translations[translation.elementId] = translation;
  }
  const translationUpdate: EpisodeTranslationDoc = {
    items: { [locale]: { translations } },
  };
  const translationDocRef = dbPaths.translationsDocRef;
  console.log(translationUpdate);
  await translationDocRef.set(translationUpdate, { merge: true });
  await importCommentsIntoMasala(unitId, comments, 'Samosa');
  if (elementFlags) {
    // rework conditionals on elementFlags
    await importExtendedScopeCommentsIntoMasala(unitId, notes, elementFlags);
  }
}

async function importExtendedScopeCommentsIntoMasala(
  unitId: string,
  notes: NotesData,
  elementFlags: FlagsData
) {
  const comments: Map<string, string> = new Map();
  for (const [elementId, text] of Object.entries(notes)) {
    // cheat test for Masala reference id
    if (elementId.includes(':') && text) {
      const flags = elementFlags[elementId];
      const flagged = getMasalaFlag(flags);
      if (flagged) {
        comments.set(elementId, text);
      }
    }
  }
  await importCommentsIntoMasala(unitId, comments, 'Samosa');
}
