AlexaClientSDK  1.25.0
A cross-platform, modular SDK for interacting with the Alexa Voice Service
AudioInputProcessor.h
Go to the documentation of this file.
1 /*
2  * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License").
5  * You may not use this file except in compliance with the License.
6  * A copy of the License is located at
7  *
8  * http://aws.amazon.com/apache2.0/
9  *
10  * or in the "license" file accompanying this file. This file is distributed
11  * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
12  * express or implied. See the License for the specific language governing
13  * permissions and limitations under the License.
14  */
15 
16 #ifndef ALEXA_CLIENT_SDK_CAPABILITYAGENTS_AIP_INCLUDE_AIP_AUDIOINPUTPROCESSOR_H_
17 #define ALEXA_CLIENT_SDK_CAPABILITYAGENTS_AIP_INCLUDE_AIP_AUDIOINPUTPROCESSOR_H_
18 
19 #include <chrono>
20 #include <map>
21 #include <memory>
22 #include <unordered_map>
23 #include <unordered_set>
24 #include <vector>
25 
60 
61 #include "AudioProvider.h"
62 #include "Initiator.h"
63 
64 namespace alexaClientSDK {
65 namespace capabilityAgents {
66 namespace aip {
67 
88  , public std::enable_shared_from_this<AudioInputProcessor> {
89 public:
92 
99  using EncodingFormatRequest = std::
100  map<std::string, std::pair<avsCommon::utils::AudioFormat::Encoding, avsCommon::utils::AudioFormat::Encoding>>;
101 
106  using EncodingFormatResponse = std::map<std::string, avsCommon::utils::AudioFormat::Encoding>;
107 
127 
129  static constexpr const char* KEYWORD_TEXT_STOP = "STOP";
130 
132  static const auto INVALID_INDEX = std::numeric_limits<avsCommon::avs::AudioInputStream::Index>::max();
133 
164  static std::shared_ptr<AudioInputProcessor> create(
165  std::shared_ptr<avsCommon::sdkInterfaces::DirectiveSequencerInterface> directiveSequencer,
166  std::shared_ptr<avsCommon::sdkInterfaces::MessageSenderInterface> messageSender,
167  std::shared_ptr<avsCommon::sdkInterfaces::ContextManagerInterface> contextManager,
168  std::shared_ptr<avsCommon::sdkInterfaces::FocusManagerInterface> focusManager,
169  std::shared_ptr<avsCommon::avs::DialogUXStateAggregator> dialogUXStateAggregator,
170  std::shared_ptr<avsCommon::sdkInterfaces::ExceptionEncounteredSenderInterface> exceptionEncounteredSender,
171  std::shared_ptr<avsCommon::sdkInterfaces::UserInactivityMonitorInterface> userInactivityNotifier,
172  std::shared_ptr<avsCommon::sdkInterfaces::SystemSoundPlayerInterface> systemSoundPlayer,
173  const std::shared_ptr<avsCommon::sdkInterfaces::LocaleAssetsManagerInterface>& assetsManager,
174  std::shared_ptr<settings::WakeWordConfirmationSetting> wakeWordConfirmation,
175  std::shared_ptr<settings::SpeechConfirmationSetting> speechConfirmation,
176  const std::shared_ptr<avsCommon::avs::CapabilityChangeNotifierInterface>& capabilityChangeNotifier,
177  std::shared_ptr<settings::WakeWordsSetting> wakeWordsSetting = nullptr,
178  std::shared_ptr<speechencoder::SpeechEncoder> speechEncoder = nullptr,
179  AudioProvider defaultAudioProvider = AudioProvider::null(),
180  std::shared_ptr<avsCommon::sdkInterfaces::PowerResourceManagerInterface> powerResourceManager = nullptr,
181  std::shared_ptr<avsCommon::utils::metrics::MetricRecorderInterface> metricRecorder = nullptr,
182  const std::shared_ptr<ExpectSpeechTimeoutHandler>& expectSpeechTimeoutHandler = nullptr);
183 
189  void addObserver(std::shared_ptr<ObserverInterface> observer);
190 
199  void removeObserver(std::shared_ptr<ObserverInterface> observer);
200 
247  std::future<bool> recognize(
248  AudioProvider audioProvider,
249  Initiator initiator,
250  std::chrono::steady_clock::time_point startOfSpeechTimestamp = std::chrono::steady_clock::now(),
253  std::string keyword = "",
254  std::shared_ptr<const std::vector<char>> KWDMetadata = nullptr,
255  const std::string& initiatorToken = "");
256 
265  std::future<bool> stopCapture();
266 
273  std::future<void> resetState();
274 
277  void onContextAvailable(const std::string& jsonContext) override;
280 
285  void onExceptionReceived(const std::string& exceptionMessage) override;
287 
290  void handleDirectiveImmediately(std::shared_ptr<avsCommon::avs::AVSDirective> directive) override;
291  void preHandleDirective(std::shared_ptr<DirectiveInfo> info) override;
292  void handleDirective(std::shared_ptr<DirectiveInfo> info) override;
293  void cancelDirective(std::shared_ptr<DirectiveInfo> info) override;
294  void onDeregistered() override;
297 
302 
308 
311  std::unordered_set<std::shared_ptr<avsCommon::avs::CapabilityConfiguration>> getCapabilityConfigurations() override;
313 
316  void onLocaleAssetsChanged() override;
318 
321  void onConnectionStatusChanged(bool connected) override;
323 
330 
337 
344 
354 
366 
372 
373 private:
379  bool initialize();
380 
414  std::shared_ptr<avsCommon::sdkInterfaces::DirectiveSequencerInterface> directiveSequencer,
415  std::shared_ptr<avsCommon::sdkInterfaces::MessageSenderInterface> messageSender,
416  std::shared_ptr<avsCommon::sdkInterfaces::ContextManagerInterface> contextManager,
417  std::shared_ptr<avsCommon::sdkInterfaces::FocusManagerInterface> focusManager,
418  std::shared_ptr<avsCommon::sdkInterfaces::ExceptionEncounteredSenderInterface> exceptionEncounteredSender,
419  std::shared_ptr<avsCommon::sdkInterfaces::UserInactivityMonitorInterface> userInactivityMonitor,
420  std::shared_ptr<avsCommon::sdkInterfaces::SystemSoundPlayerInterface> systemSoundPlayer,
421  const std::shared_ptr<avsCommon::sdkInterfaces::LocaleAssetsManagerInterface>& assetsManager,
422  std::shared_ptr<speechencoder::SpeechEncoder> speechEncoder,
423  AudioProvider defaultAudioProvider,
424  std::shared_ptr<settings::WakeWordConfirmationSetting> wakeWordConfirmation,
425  std::shared_ptr<settings::SpeechConfirmationSetting> speechConfirmation,
426  const std::shared_ptr<avsCommon::avs::CapabilityChangeNotifierInterface>& capabilityChangeNotifier,
427  std::shared_ptr<settings::WakeWordsSetting> wakeWordsSetting,
428  std::shared_ptr<avsCommon::avs::CapabilityConfiguration> capabilitiesConfiguration,
429  std::shared_ptr<avsCommon::sdkInterfaces::PowerResourceManagerInterface> powerResourceManager,
430  std::shared_ptr<avsCommon::utils::metrics::MetricRecorderInterface> metricRecorder,
431  const std::shared_ptr<ExpectSpeechTimeoutHandler>& expectSpeechTimeoutHandler);
432 
435  void doShutdown() override;
437 
445  std::future<bool> expectSpeechTimedOut();
446 
452  void handleStopCaptureDirective(std::shared_ptr<DirectiveInfo> info);
453 
459  void handleExpectSpeechDirective(std::shared_ptr<DirectiveInfo> info);
460 
466  void handleSetEndOfSpeechOffsetDirective(std::shared_ptr<DirectiveInfo> info);
467 
475  void handleDirectiveFailure(
476  const std::string& errorMessage,
477  std::shared_ptr<DirectiveInfo> info,
479 
487 
508  bool executeRecognize(
509  AudioProvider provider,
510  Initiator initiator,
511  std::chrono::steady_clock::time_point startOfSpeechTimestamp,
514  const std::string& keyword,
515  std::shared_ptr<const std::vector<char>> KWDMetadata,
516  const std::string& initiatorToken);
517 
545  bool executeRecognize(
546  AudioProvider provider,
547  const std::string& initiatorJson,
548  std::chrono::steady_clock::time_point startOfSpeechTimestamp = std::chrono::steady_clock::now(),
551  const std::string& keyword = "",
552  std::shared_ptr<const std::vector<char>> KWDMetadata = nullptr,
553  bool initiatedByWakeword = false,
554  bool falseWakewordDetection = false,
555  const std::string& initiatorString = "");
556 
566  void executeOnContextAvailable(const std::string& jsonContext);
567 
574  void executeOnContextFailure(const avsCommon::sdkInterfaces::ContextRequestError error);
575 
586  void executeOnFocusChanged(avsCommon::avs::FocusState newFocus);
587 
602  bool executeStopCapture(bool stopImmediately = false, std::shared_ptr<DirectiveInfo> info = nullptr);
603 
608  void executeResetState();
609 
623  bool executeExpectSpeech(std::chrono::milliseconds timeout, std::shared_ptr<DirectiveInfo> info);
624 
630  bool executeExpectSpeechTimedOut();
631 
638  void executeOnDialogUXStateChanged(
640 
644  void executeDisconnected();
645 
649  void executeOnLocaleAssetsChanged();
650 
657  void setState(ObserverInterface::State state);
658 
664  void removeDirective(std::shared_ptr<DirectiveInfo> info);
665 
667  void sendRequestNow();
668 
670 
677  bool handleSetWakeWordConfirmation(std::shared_ptr<DirectiveInfo> info);
678 
685  bool handleSetSpeechConfirmation(std::shared_ptr<DirectiveInfo> info);
686 
693  bool handleSetWakeWords(std::shared_ptr<DirectiveInfo> info);
694 
699  void managePowerResource(ObserverInterface::State newState);
700 
706  avsCommon::avs::MessageRequest::MessageRequestResolveFunction getMessageRequestResolverLocked() const;
707 
714  void closeAttachmentReaders(
717 
723  bool isEncodingFormatSupported(avsCommon::utils::AudioFormat::Encoding encodingFormat) const;
724 
730  bool isUsingEncoderLocked() const;
731 
737  bool multiStreamsRequestedLocked() const;
738 
740  std::shared_ptr<avsCommon::utils::metrics::MetricRecorderInterface> m_metricRecorder;
741 
743  std::shared_ptr<avsCommon::sdkInterfaces::DirectiveSequencerInterface> m_directiveSequencer;
744 
746  std::shared_ptr<avsCommon::sdkInterfaces::MessageSenderInterface> m_messageSender;
747 
749  std::shared_ptr<avsCommon::sdkInterfaces::ContextManagerInterface> m_contextManager;
750 
752  std::shared_ptr<avsCommon::sdkInterfaces::FocusManagerInterface> m_focusManager;
753 
755  std::shared_ptr<avsCommon::sdkInterfaces::UserInactivityMonitorInterface> m_userInactivityMonitor;
756 
758  avsCommon::utils::timing::Timer m_expectingSpeechTimer;
759 
761  const std::shared_ptr<speechencoder::SpeechEncoder> m_encoder;
762 
769 
772  std::unordered_set<std::shared_ptr<ObserverInterface>> m_observers;
773 
778  AudioProvider m_defaultAudioProvider;
779 
784  AudioProvider m_lastAudioProvider;
785 
794  std::unordered_map<std::string, std::vector<std::shared_ptr<avsCommon::avs::MessageRequest::NamedReader>>>
795  m_attachmentReaders;
796 
802  std::string m_recognizePayload;
803 
810  std::shared_ptr<avsCommon::avs::MessageRequest> m_recognizeRequest;
811 
813  std::shared_ptr<avsCommon::avs::MessageRequest> m_recognizeRequestSent;
814 
816  ObserverInterface::State m_state;
817 
819  avsCommon::avs::FocusState m_focusState;
820 
825  bool m_preparingToSend;
826 
831  std::function<void()> m_deferredStopCapture;
832 
834  bool m_initialDialogUXStateReceived;
835 
840  bool m_localStopCapturePerformed;
841 
845  bool m_streamIsClosedInRecognizingState;
846 
848  std::shared_ptr<avsCommon::sdkInterfaces::SystemSoundPlayerInterface> m_systemSoundPlayer;
849 
851  std::shared_ptr<avsCommon::sdkInterfaces::LocaleAssetsManagerInterface> m_assetsManager;
852 
862  std::unique_ptr<std::string> m_precedingExpectSpeechInitiator;
864 
866  std::unordered_set<std::shared_ptr<avsCommon::avs::CapabilityConfiguration>> m_capabilityConfigurations;
867 
869  std::shared_ptr<settings::WakeWordConfirmationSetting> m_wakeWordConfirmation;
870 
872  std::shared_ptr<settings::SpeechConfirmationSetting> m_speechConfirmation;
873 
875  std::shared_ptr<avsCommon::avs::CapabilityChangeNotifierInterface> m_capabilityChangeNotifier;
876 
878  std::shared_ptr<settings::WakeWordsSetting> m_wakeWordsSetting;
879 
881  std::shared_ptr<avsCommon::sdkInterfaces::PowerResourceManagerInterface> m_powerResourceManager;
882 
884  std::chrono::steady_clock::time_point m_stopCaptureReceivedTime;
885 
890  std::shared_ptr<ExpectSpeechTimeoutHandler> m_expectSpeechTimeoutHandler;
891 
896  std::string m_preCachedDialogRequestId;
897 
901  std::chrono::milliseconds m_timeSinceLastResumeMS;
902 
906  std::chrono::milliseconds m_timeSinceLastPartialMS;
907 
911  bool m_usingEncoder;
912 
916  std::mutex m_mutex;
917 
922 
930  EncodingFormatResponse m_encodingAudioFormats;
931 
935  mutable std::mutex m_encodingFormatMutex;
936 
940  unsigned int m_audioBytesForMetricThreshold;
941 
945  std::string m_uploadMetricName;
946 
948  std::shared_ptr<avsCommon::sdkInterfaces::PowerResourceManagerInterface::PowerResourceId> m_powerResourceId;
949 
957 };
958 
959 } // namespace aip
960 } // namespace capabilityAgents
961 } // namespace alexaClientSDK
962 
963 #endif // ALEXA_CLIENT_SDK_CAPABILITYAGENTS_AIP_INCLUDE_AIP_AUDIOINPUTPROCESSOR_H_
void onConnectionStatusChanged(bool connected) override
Definition: AudioInputProcessor.cpp:1810
Definition: CapabilityAgent.h:47
void preHandleDirective(std::shared_ptr< DirectiveInfo > info) override
Definition: AudioInputProcessor.cpp:540
void addObserver(std::shared_ptr< ObserverInterface > observer)
Definition: AudioInputProcessor.cpp:453
MixingBehavior
Definition: MixingBehavior.h:25
std::map< std::string, std::pair< avsCommon::utils::AudioFormat::Encoding, avsCommon::utils::AudioFormat::Encoding > > EncodingFormatRequest
Definition: AudioInputProcessor.h:100
Definition: SettingEventMetadata.h:27
static const auto INVALID_INDEX
A reserved Index value which is considered invalid.
Definition: AudioInputProcessor.h:132
void onSendCompleted(avsCommon::sdkInterfaces::MessageRequestObserverInterface::Status status) override
Definition: AudioInputProcessor.cpp:1624
void onDialogUXStateChanged(avsCommon::sdkInterfaces::DialogUXStateObserverInterface::DialogUXState newState) override
Definition: AudioInputProcessor.cpp:603
static const AudioProvider & null()
Definition: AudioProvider.h:104
ContextRequestError
Definition: ContextRequesterInterface.h:40
ExceptionErrorType
Definition: ExceptionErrorType.h:28
void onContextAvailable(const std::string &jsonContext) override
Definition: AudioInputProcessor.cpp:528
FocusState
Definition: FocusState.h:29
void onExceptionReceived(const std::string &exceptionMessage) override
Definition: AudioInputProcessor.cpp:1619
std::future< bool > stopCapture()
Definition: AudioInputProcessor.cpp:519
Stop returning data when all of the data in the buffer at the time close() was called has been read...
static settings::SettingEventMetadata getSpeechConfirmationMetadata()
Definition: AudioInputProcessor.cpp:1682
DialogUXState
The different dialog specific AVS UX states.
Definition: DialogUXStateObserverInterface.h:32
void onContextFailure(const avsCommon::sdkInterfaces::ContextRequestError error) override
Definition: AudioInputProcessor.cpp:532
void onLocaleAssetsChanged() override
Definition: AudioInputProcessor.cpp:1641
State
The different states the AudioInputProcessor can be in.
Definition: AudioInputProcessorObserverInterface.h:30
void handleDirectiveImmediately(std::shared_ptr< avsCommon::avs::AVSDirective > directive) override
Definition: AudioInputProcessor.cpp:536
EncodingFormatResponse getEncodingAudioFormats() const
Definition: AudioInputProcessor.cpp:1917
Initiator
Definition: Initiator.h:28
bool setEncodingAudioFormat(avsCommon::utils::AudioFormat::Encoding encoding)
Definition: AudioInputProcessor.cpp:1823
std::unordered_map< directiveRoutingRule::DirectiveRoutingRule, BlockingPolicy > DirectiveHandlerConfiguration
Definition: DirectiveHandlerConfiguration.h:32
static settings::SettingEventMetadata getWakeWordsEventsMetadata()
Definition: AudioInputProcessor.cpp:714
std::unordered_set< std::shared_ptr< avsCommon::avs::CapabilityConfiguration > > getCapabilityConfigurations() override
Definition: AudioInputProcessor.cpp:1636
void onResponseStatusReceived(avsCommon::sdkInterfaces::MessageRequestObserverInterface::Status status) override
Definition: AudioInputProcessor.cpp:1611
void removeObserver(std::shared_ptr< ObserverInterface > observer)
Definition: AudioInputProcessor.cpp:461
Index
Index used for setting access.
Definition: StateReportGeneratorTest.cpp:41
std::map< std::string, avsCommon::utils::AudioFormat::Encoding > EncodingFormatResponse
Definition: AudioInputProcessor.h:106
Whether or not curl logs should be emitted.
Definition: AVSConnectionManager.h:36
static constexpr const char * KEYWORD_TEXT_STOP
A special keyword sent by supported wakeword engines for "Alexa, Stop".
Definition: AudioInputProcessor.h:129
A state observer for an AudioInputProcessor.
Definition: AudioInputProcessorObserverInterface.h:27
std::future< void > resetState()
Definition: AudioInputProcessor.cpp:523
avsCommon::avs::DirectiveHandlerConfiguration getConfiguration() const override
Definition: AudioInputProcessor.cpp:442
EncodingFormatResponse requestEncodingAudioFormats(const EncodingFormatRequest &encodings)
Definition: AudioInputProcessor.cpp:1858
std::function< bool(const std::shared_ptr< EditableMessageRequest > &req, const std::string &resolveKey)> MessageRequestResolveFunction
Definition: MessageRequest.h:70
Definition: InternetConnectionObserverInterface.h:26
std::future< bool > recognize(AudioProvider audioProvider, Initiator initiator, std::chrono::steady_clock::time_point startOfSpeechTimestamp=std::chrono::steady_clock::now(), avsCommon::avs::AudioInputStream::Index begin=INVALID_INDEX, avsCommon::avs::AudioInputStream::Index keywordEnd=INVALID_INDEX, std::string keyword="", std::shared_ptr< const std::vector< char >> KWDMetadata=nullptr, const std::string &initiatorToken="")
Definition: AudioInputProcessor.cpp:469
static std::shared_ptr< AudioInputProcessor > create(std::shared_ptr< avsCommon::sdkInterfaces::DirectiveSequencerInterface > directiveSequencer, std::shared_ptr< avsCommon::sdkInterfaces::MessageSenderInterface > messageSender, std::shared_ptr< avsCommon::sdkInterfaces::ContextManagerInterface > contextManager, std::shared_ptr< avsCommon::sdkInterfaces::FocusManagerInterface > focusManager, std::shared_ptr< avsCommon::avs::DialogUXStateAggregator > dialogUXStateAggregator, std::shared_ptr< avsCommon::sdkInterfaces::ExceptionEncounteredSenderInterface > exceptionEncounteredSender, std::shared_ptr< avsCommon::sdkInterfaces::UserInactivityMonitorInterface > userInactivityNotifier, std::shared_ptr< avsCommon::sdkInterfaces::SystemSoundPlayerInterface > systemSoundPlayer, const std::shared_ptr< avsCommon::sdkInterfaces::LocaleAssetsManagerInterface > &assetsManager, std::shared_ptr< settings::WakeWordConfirmationSetting > wakeWordConfirmation, std::shared_ptr< settings::SpeechConfirmationSetting > speechConfirmation, const std::shared_ptr< avsCommon::avs::CapabilityChangeNotifierInterface > &capabilityChangeNotifier, std::shared_ptr< settings::WakeWordsSetting > wakeWordsSetting=nullptr, std::shared_ptr< speechencoder::SpeechEncoder > speechEncoder=nullptr, AudioProvider defaultAudioProvider=AudioProvider::null(), std::shared_ptr< avsCommon::sdkInterfaces::PowerResourceManagerInterface > powerResourceManager=nullptr, std::shared_ptr< avsCommon::utils::metrics::MetricRecorderInterface > metricRecorder=nullptr, const std::shared_ptr< ExpectSpeechTimeoutHandler > &expectSpeechTimeoutHandler=nullptr)
Definition: AudioInputProcessor.cpp:346
void onDeregistered() override
Definition: AudioInputProcessor.cpp:594
Status
Definition: MessageRequestObserverInterface.h:33
Definition: ExpectSpeechTimeoutHandlerInterface.h:31
ClosePoint
An enum class to indicate when the read() function should stop returning data after a call to close()...
Definition: AttachmentReader.h:59
void cancelDirective(std::shared_ptr< DirectiveInfo > info) override
Definition: AudioInputProcessor.cpp:590
static settings::SettingEventMetadata getWakeWordConfirmationMetadata()
Definition: AudioInputProcessor.cpp:1675
A dialog specific UX state observer.
Definition: DialogUXStateObserverInterface.h:29
void onFocusChanged(avsCommon::avs::FocusState newFocus, avsCommon::avs::MixingBehavior behavior) override
Definition: AudioInputProcessor.cpp:598
void handleDirective(std::shared_ptr< DirectiveInfo > info) override
Definition: AudioInputProcessor.cpp:543

AlexaClientSDK 1.25.0 - Copyright 2016-2021 Amazon.com, Inc. or its affiliates. All Rights Reserved. Licensed under the Apache License, Version 2.0