Have ideas to improve npm?Join in the discussion! »

    mmir-plugin-speech-io
    TypeScript icon, indicating that this package has built-in type declarations

    2.0.0-rc.16 • Public • Published

    mmir-plugin-speech-io

    MIT license GitHub package.json version npm

    Plugin for the MMIR framework that adds state-machines for managing speech input/output states

    NOTE currently this plugin requires a webpack build process (see mmir-webpack).

    Configuration

    for including plugin in mmir webpack build

    //...
    const mmirAppConfig = {
      includePlugins: [
        {id: 'mmir-plugin-speech-io', config: {
          //optional configuration for the plugin:
          alternativeResults: 5,
          longPause: true,
          command: {
            languageModel: 'dictation',
            alternativeResults: 2
          }
        }}
      ],
      //...
    };
    
    const webpack = require('webpack');
    module.exports = function(webpackConfig, _options){
      try{
        require('mmir-webpack')(webpack, webpackConfig, mmirAppConfig);
      } catch(err){
        console.log(err);
        throw err;
      }
      return webpackConfig;
    }

    Configuration values:

      /**
       * do disable stopping TTS output when starting ASR (microphone) input?
       *
       * (by default: do stop TTS before starting ASR/microphone input)
       *
       * @default false
       */
      disableCancelPrompt?: boolean;
      /**
       * number of (max.) alternative ASR results
       * @default 1
       */
      alternativeResults?: number;
      /**
       * speech mode / language model that should be used for ASR
       *
       * @default speechMode === 'dictation'? 'dictation' : 'search'
       */
      languageModel?: 'dictation' | 'search';
      /**
       * EOS (End Of Speech) detection pause:
       * require long pause (or only a short pause) to detect end-of-speech?
       *
       * @default false
       */
      longPause?: boolean;
    
      /**
       * disable improved ASR feedback (see documentation of {@link mmir.MediaManager.startRecord})
       *
       * (NOTE: will be ignored if not supported by active ASR engine)
       *
       * @default speechInputMode === 'command'
       */
      disableImprovedFeedback?: boolean;
    
      /**
       * enable/disable receiving interim ASR results depending on speech mode
       *
       * @default speechInputMode === 'dictation'
       */
      enableInterimResults?: boolean
    
      /**
       * Flag that indicates if end-of-speech (EOS) detection will be used
       * for speech recognition.
       *
       * If enabled, the recognition will be stopped after EOS was detected
       * (e.g. upon a pause after dictating a sentence).
       *
       * @default false
       */
      eos?: boolean;
    
      /**
       * specific configration values for active speech-mode ('dictation' or 'command'):
       * override general configuration values for the plugin (see configuration options above)
       */
      dictation?: {...};
      command?: {...};
    
      /**
       * The execution context/name (see {@link mmir.MediaManager.setDefaultCtx}) for the
       * recognition functions (i.e. `mmir.media.recognize()` | `mmir.media.startRecord()` | `mmir.media.stopRecord()` | `mmir.media.cancelRecognition()`).
       *
       * @default undefined (i.e. use default context)
       */
      asrEngine?: string;
    
      /**
       * The execution context/name (see {@link mmir.MediaManager.setDefaultCtx}) for the
       * speech sythesis functions (i.e. `mmir.media.tts()` | `mmir.media.cancelSpeech()`).
       *
       * @default undefined (i.e. use default context)
       */
      ttsEngine?: string;
    
      /**
       * Custom / default options for ASR (speech recognition):
       * note that configuration for [[SpeechIoPluginConfigurationEntry]] superceed these default options.
       *
       * NOTE: if specified, the same default options are used regardless of current language setting (see {@link mmir.LanguageManager#getLanguage})
       *
       * @default undefined
       */
      asrDefaultOptions?: Partial<ASROptions>;
    
      /**
       * Custom / default options for TTS (speech synthesis):
       * note should not set `language` or `voice` options with this!
       *
       * (i.e. should only be used for (custom) options that are independet of language setting)
       *
       * NOTE: if **not** specified per language, the default options should not contain any language dependend settings (e.g. like `voice`)
       *
       * @default undefined
       */
      ttsDefaultOptions?: Partial<TTSOptions> | {[languageCode: string]: Partial<TTSOptions>};
    
      /**
       * During active speech-input in 'dictation' mode:
       * if detected as single input/sentence, will stop speech-input for the input-control.
       *
       * The canceling will only be applied, if it matches the whole input/sentence, i.e.:
       * <pre>
       * isStopCommand("some sentence <stop word>") -> false
       * isStopCommand(" <stop word> ") -> true
       * </pre>
       *
       * Can be either set with a string, or an object/dictionary that maps a
       * language ID to to the stop-command.
       * <pre>
       * var stopCmd = {
       *   de: 'anhalten',
       *   en: 'stop'
       * }
       * </pre>
       *
       * NOTE can be set during runtime with:
       * <pre>
       * voiceUiService.ctrl.speechIn.setDictationCommand(dictStopWord, dictAbortWord);
       * </pre>
       * @default ""
       */
      dictStopWord?: string | {[languageId: string]: string};
    
      /**
       * Will only work, if `dictStopWord` is also set!
       *
       * During active speech-input in 'dictation' mode:
       * if detected as single input/sentence, will abort speech-input for the
       * input-control and revert its text to its previous state (i.e. before dictation
       * was started for the input-control).
       *
       * The aborting will only be applied, if it matches the whole input/sentence, i.e.:
       * <pre>
       * isAbortCommand("some sentence <abort word>") -> false
       * isAbortCommand(" <abort word> ") -> true
       * </pre>
       *
       * Can be either set with a string, or an object/dictionary that maps a
       * language ID to to the abort-command.
       * <pre>
       * var abortCmd = {
       *   de: 'rückgängig',
       *   en: 'undo'
       * }
       * </pre>
       *
       * NOTE can be set during runtime with:
       * <pre>
       * voiceUiService.ctrl.speechIn.setDictationCommand(dictStopWord, dictAbortWord);
       * </pre>
       *
       * NOTE IMPORTANT:
       * currently this feature requires, that the original text, that will be reverted
       * to, is set manually to the text-element's `dataset` to key `original-text`
       * (e.g. when starting dictation)!
       *
       * @default ""
       */
      dictAbortWord?: string | {[languageId: string]: string};
    
      /**
       * disable visual feedback for unstable dictation input in "pure text" input controls
       *
       * @default false
       */
      disableUnstableFeedback?: boolean;
    
      /**
       * enable/disable sound feedback for click/touch interactions
       *
       * @default true
       */
      soundFeedbackEnabled?: boolean;
    
      /**
       * enable/disable haptic feedback for click/touch interactions
       *
       * NOTE: haptic feedback (vibration) may not be supported for all execution
       *       environments/devices (will be ignored, if not supported)
       *
       * @default true
       */
      hapticFeedbackEnabled?: boolean;
    
      /**
       * print additional debug (console) output for speech I/O state-machine
       * @default false
       */
      showVuiDebugOutput?: boolean;
    
      /**
       * enable "barge in" during speech prompt (TTS / speech synthesis output):
       *
       * by default, speech input (i.e. recognition) will be disabled during speech synthesis
       * in order to prevent recording the synthesized speech.
       *
       * If echo-cancellation is used when recording the speech, or synthesis is outputed through headphones
       * or similiar, "barge in" can be enabled:
       * in this case the micropohne stays open (i.e. speech recognition stays active) during speech synthesis.
       *
       * @default false
       */
      enableBargeIn?: boolean;
    
      /**
       * if enabled:
       * if speech-mode 'command' and 'guided-input' is active and entering a new view
       * -> do start input for first "input-control" if auto-proceed is active
       *
       * NOTE 'guided-input' mode is not implemented yet
       *
       * @default false
       */
      inputCtrlAutoProceed?: boolean;
    
      /**
       * if enabled:
       * the `mmir-service` will not raise an 'init' event upon intialization on the `mmir.dialog` instance.
       *
       * Otherwise, the `mmir-service` will raise an 'init' event with event data:
       * ```
       * {
       *  appConfig: IAppSettings,
       *  mmir: ExtMmirModule<CmdImpl>,
       *  emma: EmmaUtil<CmdImpl>
       * }
       * ```
       * This event and its data can be used in the `dialog.xml` state definition's inital state by
       * defining a transition for the event `init` (see example).
       *
       * @default false
       * @example
       * <scxml xmlns="http://www.w3.org/2005/07/scxml" version="1.0"
       *        profile="ecmascript" id="scxmlRoot" initial="AppStart">
       * <state id="AppStart">
       *   <!-- transition for init-event, which in this example will trigger state-change to "MainApp" -->
       *   <transition event="init" target="MainApp">
       *    <script>
       *      // get event data contents:
       *      var appConfig = _event.data.appConfig;
       *      var mmir = _event.data.mmir;
       *      var emmaUtil = _event.data.emma;
       *      //... use them somehow (e.g. could be stored in data model variable)
       */
      preventDialogManagerInit?: boolean;
    
      /**
       * if a prompt is active (i.e. TTS is playing), when a new one is requested:
       * cancel the current/active one (and read the new one)?
       *
       * If `false`, the the new prompt may be discarded, or cancel/replace the active one,
       * depending on the `ReadOptions` of the new prompt.
       *
       * @default true
       */
      cancelOnNewPrompt?: boolean;
    }

    Example Usage

    speechCommand

    speechCommand will be triggered, if speech-input is in command mode, and an (stable) ASR result becomes available

    1. register for speechCommand

      ...
      vuiCtrl.ctrl.enterView(mmirService.speechEvents.speechCommand.subscribe(result => this.evalSemantics(result)));
    2. parse ASR result in speechCommand, and create appropriate interpretation, and trigger commandAction

      public evalSemantics(emma: RecognitionEmma){
      
        const asrResult = this.mmir.emma._extractAsrData(emma);
        const text = asrResult.text;
      
        this.mmir.semantic.interpret(text, null, result => {
      
          let semantic: any;
          if(result.semantic != null) {
            semantic = result.semantic;
            semantic.phrase = text;
            if(this._debugMsg) console.log("semantic : " + result.semantic);//DEBUG
          }
          else {
      
            //create "no-match" semantic-object:
            semantic = {
              "NoMatch": {
                "phrase": text
              }
            };
          }
      
          this.mmir.emma.setSpeechUnderstanding(emma, semantic);
      
          // will trigger/emit commandAction:
          this.mmir.speechioInput.raise("speech",  semantic);
      
        });
      
      }

    Install

    npm i mmir-plugin-speech-io

    DownloadsWeekly Downloads

    8

    Version

    2.0.0-rc.16

    License

    MIT

    Unpacked Size

    884 kB

    Total Files

    129

    Last publish

    Collaborators

    • avatar