import {
  AudioConfig,
  CancellationReason,
  OutputFormat,
  SpeechConfig,
  PhraseListGrammar,
  ProfanityOption,
  ResultReason,
  SpeechRecognizer,
} from 'microsoft-cognitiveservices-speech-sdk';
import { Logger } from '~/logic/Logger/Logger';

import { Clock } from '../Clock';
import { RecognizedResult } from './RecognizedResult';
import { RecognizingResult } from './RecognizingResult';
import { ResultsStabilityBuffer } from './ResultsStabilityBuffer';
import { TimestampInMs } from '../_types';
import { CaptionFormatBuffer } from '../CaptionFormatBuffer/CaptionFormatBuffer';

const customPunctuationCommands = [
  'bam bam',
  'comma',
  'peerk',
  'peermak',
  'kyumak',
  'quex',
  'sklam',
];

/** The config for the Microsoft speech recognizer */
interface RecognizerConfig {
  /** The key for the speech config */
  msCognitiveServiceKey: string;

  /** The endpoint ID for the speech config */
  msCognitiveServiceEndpointId?: string;

  /** A callback to disable the CaptioningInterface's input */
  setCaptionInputDisabled?: (isDisabled: boolean) => void;

  /** A callback to set the speech engine connected state */
  setSpeechEngineConnected?: (state: boolean) => void;

  /** A callback to set the microsoftSessionId in the CaptioningInterface */
  setMicrosoftSessionId: (microsoftSessionId: string | null) => void;

  /** The word list for the Microsoft speech recognizer  */
  wordlist: string[];

  /**
   * The caption format buffer. If it is not supplied, then the old formatting
   * and buffering logic is used.
   */
  captionFormatBuffer: CaptionFormatBuffer;
}

/**
 * A logger function for amplitude
 *
 * @deprecated Will be replaced by the logger instance
 */
type AmplitudeLogger = (message: string, options?: any) => void;

/**
 * A conservatively typed interface for the recognized result json data's NBest.
 * This is conservatively typed, so that we intentionally handle potential
 * missing data.
 */
interface NBestResult {
  Confidence?: number;
  ITN?: string;
}

/**
 * A conservatively typed interface for the recognized result json data.
 * This is conservatively typed, so that we intentionally handle potential
 * missing data.
 */
interface RecognizedResultJsonData {
  RecognitionStatus?: string;
  NBest?: NBestResult[];
}

// All of this logic came from the original swatei/CaptioningInterface
/** Class to interface with the Microsoft speech recognizer */
export class Recognizer {
  /** A amplitudeLogger function */
  private _amplitudeLogger: AmplitudeLogger;
  /** The config for the Microsoft speech recognizer */
  private _config: RecognizerConfig;
  /** The current SpeechConfig for the recognizer client */
  private _speechConfig: SpeechConfig;
  /** The current AudioConfig for the recognizer client */
  private _audioConfig: AudioConfig;
  /** A logger instance*/
  private _logger: Logger;
  /** The current results buffer */
  private _resultsBuffer: ResultsStabilityBuffer | undefined;
  /** The current MS SpeechRecognizer client */
  private _recognizer: SpeechRecognizer | undefined;
  /** Tracks the state of the connection with the speech engine */
  private _closed = false;

  /** Create a Microsoft speech recognizer */
  constructor(
    config: RecognizerConfig,
    // eslint-disable-next-line @typescript-eslint/no-empty-function
    amplitudeLogger: AmplitudeLogger = () => {}
  ) {
    this._config = config;
    this._amplitudeLogger = amplitudeLogger;
    this._logger = Logger.getInstance();

    // Set up speech config
    this._speechConfig = SpeechConfig.fromSubscription(
      this._config.msCognitiveServiceKey,
      'eastus'
    );
    this._speechConfig.outputFormat = OutputFormat.Detailed;
    this._speechConfig.speechRecognitionLanguage = 'en-US';
    this._speechConfig.setProfanity(ProfanityOption.Raw);
    this._speechConfig.enableAudioLogging();
    if (this._config.msCognitiveServiceEndpointId) {
      this._speechConfig.endpointId = this._config.msCognitiveServiceEndpointId;
    } else {
      this._logger.warn({ message: 'SpeechRecognizer custom endpoint ID not provided' });
    }

    // Set up audio config
    this._audioConfig = AudioConfig.fromDefaultMicrophoneInput();
  }

  /**
   * - Initializes the first results buffer
   * - Sets up the speech recognizer callbacks
   * - Primes the speech recognizer
   * - Starts the speech recognizer
   */
  start() {
    this._closed = false;
    /** The timestamp of when the SpeechRecognizer client was initialized */
    const startTimestamp = new Clock().now();
    this._recognizer = new SpeechRecognizer(this._speechConfig, this._audioConfig);
    this._prime(this._recognizer);
    this._amplitudeLogger('Microsoft Speech Engine Initialized');
    this._recognizer.recognizing = this._createRecognizingCallback(startTimestamp);
    this._recognizer.recognized = this._createRecognizedCallback(startTimestamp);
    this._recognizer.sessionStarted = this._createSessionStartedCallback();
    this._recognizer.canceled = this._createCanceledCallback();
    this._recognizer.sessionStopped = this._createSessionStoppedCallback();
    this._recognizer.startContinuousRecognitionAsync();
    void this._logAudioDevices();
  }

  /**
   * - Closes the speech recognizer if it exists
   */
  stop() {
    if (this._recognizer && !this._closed) {
      this._recognizer.close();
      this._closed = true;
    }
  }

  private createNewResultsBuffer() {
    // Disable caption input
    this._config.setCaptionInputDisabled?.(true);
    return new ResultsStabilityBuffer({
      captionFormatBuffer: this._config.captionFormatBuffer,
    });
  }

  private clearResultsBuffer() {
    // Enable caption input
    this._config.setCaptionInputDisabled?.(false);
    this._resultsBuffer = undefined;
  }

  private get resultsBuffer() {
    if (!this._resultsBuffer) {
      this._resultsBuffer = this.createNewResultsBuffer();
    }
    return this._resultsBuffer;
  }

  private _prime(recognizer: SpeechRecognizer) {
    const phraseList = PhraseListGrammar.fromRecognizer(recognizer);
    const phraseListToPrime = [...customPunctuationCommands, ...this._config.wordlist];
    phraseList.addPhrases(phraseListToPrime);
    this._amplitudeLogger('Microsoft Engine - custom phrase list initialized');
  }

  private _createRecognizingCallback(
    startTimestamp: TimestampInMs
  ): SpeechRecognizer['recognizing'] {
    return (_sender, event) => {
      try {
        const result = new RecognizingResult(startTimestamp, {
          resultId: event.result.resultId,
          offset: event.result.offset,
          duration: event.result.duration,
          text: event.result.text,
        });
        this._logger.info({ message: 'SpeechRecognizer recognizing', info: { event, result } });
        this.resultsBuffer.addResult(result);
      } catch (error) {
        this._logger.error({ message: this._getError(error), info: { event } });
      }
    };
  }

  private _createRecognizedCallback(startTimestamp: TimestampInMs): SpeechRecognizer['recognized'] {
    return (_sender, event) => {
      try {
        const resultJson = JSON.parse(event.result.json) as RecognizedResultJsonData;
        const resultText = resultJson?.NBest?.[0]?.ITN;
        if (event.result.reason !== ResultReason.RecognizedSpeech) {
          return;
        }
        // TODO: Investigate whether skipping this recognized result could result in
        // the buffer getting messed up. In other words, is it possible that we received
        // text in a recognizing event that preceded this, but somehow no text in the
        // corresponding recognized result.
        if (!resultText) {
          this._amplitudeLogger(
            `Received empty text from MSFT Recognized event: ${event.result.json}`
          );
          return;
        }
        const result = new RecognizedResult(startTimestamp, {
          resultId: event.result.resultId,
          offset: event.result.offset,
          duration: event.result.duration,
          text: resultText,
          confidence: resultJson?.NBest?.[0]?.Confidence,
        });
        this._logger.info({ message: 'SpeechRecognizer recognized', info: { event, result } });
        this.resultsBuffer.addResult(result);
        this.clearResultsBuffer();
      } catch (error) {
        this._logger.error({ message: this._getError(error), info: { event } });
      }
    };
  }

  private _createSessionStartedCallback(): SpeechRecognizer['sessionStarted'] {
    return (_sender, event) => {
      try {
        this._logger.info({
          message: 'SpeechRecognizer sessionStarted',
          info: { event },
        });
        const { sessionId } = event;
        if (!sessionId) {
          this._logger.warn({
            message: 'SpeechRecognizer sessionStarted missing sessionId',
            info: { event },
          });
          this._amplitudeLogger('Microsoft Speech Recognizer session did not start.', {
            errors: event,
          });
          return;
        }
        this._config.setSpeechEngineConnected?.(true);
        this._config.setMicrosoftSessionId(sessionId);
      } catch (error) {
        this._logger.error({ message: this._getError(error), info: { event } });
      }
    };
  }

  private _createCanceledCallback(): SpeechRecognizer['canceled'] {
    return (_sender, event) => {
      try {
        this._logger.info({
          message: 'SpeechRecognizer canceled',
          info: { event },
        });
        this._config.setSpeechEngineConnected?.(false);
        this._config.setMicrosoftSessionId(null);
        if (this._recognizer) this._recognizer.stopContinuousRecognitionAsync();
        this._amplitudeLogger('Microsoft Speech Recognizer Canceled', { errors: event });

        if (event.reason === CancellationReason.Error) {
          this._logger.warn({
            message: 'SpeechRecognizer canceled due to error',
            info: { event },
          });

          // HACK: In case of a generic websocket, restart continuous recognition mode
          if (event.errorDetails.match(/(websocket error code)/)) {
            this.stop();
            this.start();
            this._amplitudeLogger('Microsoft Speech Recognition engine re-initializing');
          }
        }
      } catch (error) {
        this._logger.error({ message: this._getError(error), info: { event } });
      }
    };
  }

  private _createSessionStoppedCallback(): SpeechRecognizer['sessionStopped'] {
    return (_sender, event) => {
      try {
        this._logger.info({
          message: 'SpeechRecognizer sessionStopped',
          info: { event },
        });
        this._config.setSpeechEngineConnected?.(false);
        this._config.setMicrosoftSessionId(null);
        this.stop();
        this._amplitudeLogger('Microsoft Speech Recognizer stopped');
      } catch (error) {
        this._logger.error({ message: this._getError(error), info: { event } });
      }
    };
  }

  private async getDefaultAudioDevices() {
    try {
      const devices = await navigator.mediaDevices.enumerateDevices();
      const audioInputs = devices.filter((device) => device.kind === 'audioinput');
      const audioOutputs = devices.filter((device) => device.kind === 'audiooutput');

      // Assuming the first audio input and output device are the defaults
      const defaultAudioInput = audioInputs[0] || null;
      const defaultAudioOutput = audioOutputs[0] || null;

      return {
        defaultAudioInput,
        defaultAudioOutput,
      };
    } catch (error) {
      return {
        defaultAudioInput: null,
        defaultAudioOutput: null,
      };
    }
  }

  private _getError(error: unknown) {
    if (error instanceof Error) return error;
    return String(error);
  }

  private async _logAudioDevices(): Promise<void> {
    try {
      const { defaultAudioInput, defaultAudioOutput } = await this.getDefaultAudioDevices();
      this._logger.info({
        message: 'Audio devices',
        info: {
          defaultAudioInput: defaultAudioInput?.label,
          defaultAudioOutput: defaultAudioOutput?.label,
        },
      });
    } catch (error) {
      this._logger.error({ message: 'Error fetching audio devices', info: { error } });
    }
  }
}
