AlexaClientSDK  3.0.0
A cross-platform, modular SDK for interacting with the Alexa Voice Service
AudioInputProcessor.h
Go to the documentation of this file.
1 /*
2  * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License").
5  * You may not use this file except in compliance with the License.
6  * A copy of the License is located at
7  *
8  * http://aws.amazon.com/apache2.0/
9  *
10  * or in the "license" file accompanying this file. This file is distributed
11  * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
12  * express or implied. See the License for the specific language governing
13  * permissions and limitations under the License.
14  */
15 
16 #ifndef ALEXA_CLIENT_SDK_CAPABILITYAGENTS_AIP_INCLUDE_AIP_AUDIOINPUTPROCESSOR_H_
17 #define ALEXA_CLIENT_SDK_CAPABILITYAGENTS_AIP_INCLUDE_AIP_AUDIOINPUTPROCESSOR_H_
18 
19 #include <chrono>
20 #include <map>
21 #include <memory>
22 #include <unordered_map>
23 #include <unordered_set>
24 #include <vector>
25 
61 
62 #include "AudioProvider.h"
63 #include "Initiator.h"
64 
65 namespace alexaClientSDK {
66 namespace capabilityAgents {
67 namespace aip {
68 
89  , public std::enable_shared_from_this<AudioInputProcessor> {
90 public:
93 
100  using EncodingFormatRequest = std::
101  map<std::string, std::pair<avsCommon::utils::AudioFormat::Encoding, avsCommon::utils::AudioFormat::Encoding>>;
102 
107  using EncodingFormatResponse = std::map<std::string, avsCommon::utils::AudioFormat::Encoding>;
108 
128 
130  static constexpr const char* KEYWORD_TEXT_STOP = "STOP";
131 
133  static const auto INVALID_INDEX = std::numeric_limits<avsCommon::avs::AudioInputStream::Index>::max();
134 
165  static std::shared_ptr<AudioInputProcessor> create(
166  std::shared_ptr<avsCommon::sdkInterfaces::DirectiveSequencerInterface> directiveSequencer,
167  std::shared_ptr<avsCommon::sdkInterfaces::MessageSenderInterface> messageSender,
168  std::shared_ptr<avsCommon::sdkInterfaces::ContextManagerInterface> contextManager,
169  std::shared_ptr<avsCommon::sdkInterfaces::FocusManagerInterface> focusManager,
170  std::shared_ptr<avsCommon::avs::DialogUXStateAggregator> dialogUXStateAggregator,
171  std::shared_ptr<avsCommon::sdkInterfaces::ExceptionEncounteredSenderInterface> exceptionEncounteredSender,
172  std::shared_ptr<avsCommon::sdkInterfaces::UserInactivityMonitorInterface> userInactivityNotifier,
173  std::shared_ptr<avsCommon::sdkInterfaces::SystemSoundPlayerInterface> systemSoundPlayer,
174  const std::shared_ptr<avsCommon::sdkInterfaces::LocaleAssetsManagerInterface>& assetsManager,
175  std::shared_ptr<settings::WakeWordConfirmationSetting> wakeWordConfirmation,
176  std::shared_ptr<settings::SpeechConfirmationSetting> speechConfirmation,
177  const std::shared_ptr<avsCommon::avs::CapabilityChangeNotifierInterface>& capabilityChangeNotifier,
178  std::shared_ptr<settings::WakeWordsSetting> wakeWordsSetting = nullptr,
179  std::shared_ptr<audioEncoderInterfaces::AudioEncoderInterface> audioEncoder = nullptr,
180  AudioProvider defaultAudioProvider = AudioProvider::null(),
181  std::shared_ptr<avsCommon::sdkInterfaces::PowerResourceManagerInterface> powerResourceManager = nullptr,
182  std::shared_ptr<avsCommon::utils::metrics::MetricRecorderInterface> metricRecorder = nullptr,
183  const std::shared_ptr<ExpectSpeechTimeoutHandler>& expectSpeechTimeoutHandler = nullptr);
184 
190  void addObserver(std::shared_ptr<ObserverInterface> observer);
191 
200  void removeObserver(std::shared_ptr<ObserverInterface> observer);
201 
256  std::future<bool> recognize(
257  AudioProvider audioProvider,
258  Initiator initiator,
259  std::chrono::steady_clock::time_point startOfSpeechTimestamp = std::chrono::steady_clock::now(),
262  std::string keyword = "",
263  std::shared_ptr<const std::vector<char>> KWDMetadata = nullptr,
264  const std::string& initiatorToken = "");
265 
274  std::future<bool> stopCapture();
275 
282  std::future<void> resetState();
283 
286  void onContextAvailable(const std::string& jsonContext) override;
289 
294  void onExceptionReceived(const std::string& exceptionMessage) override;
296 
299  void handleDirectiveImmediately(std::shared_ptr<avsCommon::avs::AVSDirective> directive) override;
300  void preHandleDirective(std::shared_ptr<DirectiveInfo> info) override;
301  void handleDirective(std::shared_ptr<DirectiveInfo> info) override;
302  void cancelDirective(std::shared_ptr<DirectiveInfo> info) override;
303  void onDeregistered() override;
306 
311 
317 
320  std::unordered_set<std::shared_ptr<avsCommon::avs::CapabilityConfiguration>> getCapabilityConfigurations() override;
322 
325  void onLocaleAssetsChanged() override;
327 
330  void onConnectionStatusChanged(bool connected) override;
332 
339 
346 
353 
363 
375 
381 
382 private:
388  bool initialize();
389 
423  std::shared_ptr<avsCommon::sdkInterfaces::DirectiveSequencerInterface> directiveSequencer,
424  std::shared_ptr<avsCommon::sdkInterfaces::MessageSenderInterface> messageSender,
425  std::shared_ptr<avsCommon::sdkInterfaces::ContextManagerInterface> contextManager,
426  std::shared_ptr<avsCommon::sdkInterfaces::FocusManagerInterface> focusManager,
427  std::shared_ptr<avsCommon::sdkInterfaces::ExceptionEncounteredSenderInterface> exceptionEncounteredSender,
428  std::shared_ptr<avsCommon::sdkInterfaces::UserInactivityMonitorInterface> userInactivityMonitor,
429  std::shared_ptr<avsCommon::sdkInterfaces::SystemSoundPlayerInterface> systemSoundPlayer,
430  const std::shared_ptr<avsCommon::sdkInterfaces::LocaleAssetsManagerInterface>& assetsManager,
431  std::shared_ptr<audioEncoderInterfaces::AudioEncoderInterface> audioEncoder,
432  AudioProvider defaultAudioProvider,
433  std::shared_ptr<settings::WakeWordConfirmationSetting> wakeWordConfirmation,
434  std::shared_ptr<settings::SpeechConfirmationSetting> speechConfirmation,
435  const std::shared_ptr<avsCommon::avs::CapabilityChangeNotifierInterface>& capabilityChangeNotifier,
436  std::shared_ptr<settings::WakeWordsSetting> wakeWordsSetting,
437  std::shared_ptr<avsCommon::avs::CapabilityConfiguration> capabilitiesConfiguration,
438  std::shared_ptr<avsCommon::sdkInterfaces::PowerResourceManagerInterface> powerResourceManager,
439  std::shared_ptr<avsCommon::utils::metrics::MetricRecorderInterface> metricRecorder,
440  const std::shared_ptr<ExpectSpeechTimeoutHandler>& expectSpeechTimeoutHandler);
441 
444  void doShutdown() override;
446 
454  std::future<bool> expectSpeechTimedOut();
455 
461  void handleStopCaptureDirective(std::shared_ptr<DirectiveInfo> info);
462 
468  void handleExpectSpeechDirective(std::shared_ptr<DirectiveInfo> info);
469 
475  void handleSetEndOfSpeechOffsetDirective(std::shared_ptr<DirectiveInfo> info);
476 
484  void handleDirectiveFailure(
485  const std::string& errorMessage,
486  std::shared_ptr<DirectiveInfo> info,
488 
496 
517  bool executeRecognize(
518  AudioProvider provider,
519  Initiator initiator,
520  std::chrono::steady_clock::time_point startOfSpeechTimestamp,
523  const std::string& keyword,
524  std::shared_ptr<const std::vector<char>> KWDMetadata,
525  const std::string& initiatorToken);
526 
554  bool executeRecognize(
555  AudioProvider provider,
556  const std::string& initiatorJson,
557  std::chrono::steady_clock::time_point startOfSpeechTimestamp = std::chrono::steady_clock::now(),
560  const std::string& keyword = "",
561  std::shared_ptr<const std::vector<char>> KWDMetadata = nullptr,
562  bool initiatedByWakeword = false,
563  bool falseWakewordDetection = false,
564  const std::string& initiatorString = "");
565 
575  void executeOnContextAvailable(const std::string& jsonContext);
576 
583  void executeOnContextFailure(const avsCommon::sdkInterfaces::ContextRequestError error);
584 
595  void executeOnFocusChanged(avsCommon::avs::FocusState newFocus);
596 
611  bool executeStopCapture(bool stopImmediately = false, std::shared_ptr<DirectiveInfo> info = nullptr);
612 
617  void executeResetState();
618 
632  bool executeExpectSpeech(std::chrono::milliseconds timeout, std::shared_ptr<DirectiveInfo> info);
633 
639  bool executeExpectSpeechTimedOut();
640 
647  void executeOnDialogUXStateChanged(
649 
653  void executeDisconnected();
654 
658  void executeOnLocaleAssetsChanged();
659 
666  void setState(ObserverInterface::State state);
667 
673  void removeDirective(std::shared_ptr<DirectiveInfo> info);
674 
676  void sendRequestNow();
677 
679 
686  bool handleSetWakeWordConfirmation(std::shared_ptr<DirectiveInfo> info);
687 
694  bool handleSetSpeechConfirmation(std::shared_ptr<DirectiveInfo> info);
695 
702  bool handleSetWakeWords(std::shared_ptr<DirectiveInfo> info);
703 
708  void managePowerResource(ObserverInterface::State newState);
709 
715  avsCommon::avs::MessageRequest::MessageRequestResolveFunction getMessageRequestResolverLocked() const;
716 
723  void closeAttachmentReaders(
726 
732  bool isEncodingFormatSupported(avsCommon::utils::AudioFormat::Encoding encodingFormat) const;
733 
739  bool isUsingEncoderLocked() const;
740 
746  bool multiStreamsRequestedLocked() const;
747 
749  std::shared_ptr<avsCommon::utils::metrics::MetricRecorderInterface> m_metricRecorder;
750 
752  std::shared_ptr<avsCommon::sdkInterfaces::DirectiveSequencerInterface> m_directiveSequencer;
753 
755  std::shared_ptr<avsCommon::sdkInterfaces::MessageSenderInterface> m_messageSender;
756 
758  std::shared_ptr<avsCommon::sdkInterfaces::ContextManagerInterface> m_contextManager;
759 
761  std::shared_ptr<avsCommon::sdkInterfaces::FocusManagerInterface> m_focusManager;
762 
764  std::shared_ptr<avsCommon::sdkInterfaces::UserInactivityMonitorInterface> m_userInactivityMonitor;
765 
767  avsCommon::utils::timing::Timer m_expectingSpeechTimer;
768 
770  const std::shared_ptr<audioEncoderInterfaces::AudioEncoderInterface> m_encoder;
771 
778 
781  std::unordered_set<std::shared_ptr<ObserverInterface>> m_observers;
782 
787  AudioProvider m_defaultAudioProvider;
788 
793  AudioProvider m_lastAudioProvider;
794 
803  std::unordered_map<std::string, std::vector<std::shared_ptr<avsCommon::avs::MessageRequest::NamedReader>>>
804  m_attachmentReaders;
805 
811  std::string m_recognizePayload;
812 
819  std::shared_ptr<avsCommon::avs::MessageRequest> m_recognizeRequest;
820 
822  std::shared_ptr<avsCommon::avs::MessageRequest> m_recognizeRequestSent;
823 
825  ObserverInterface::State m_state;
826 
828  avsCommon::avs::FocusState m_focusState;
829 
834  bool m_preparingToSend;
835 
840  std::function<void()> m_deferredStopCapture;
841 
843  bool m_initialDialogUXStateReceived;
844 
849  bool m_localStopCapturePerformed;
850 
854  bool m_streamIsClosedInRecognizingState;
855 
857  std::shared_ptr<avsCommon::sdkInterfaces::SystemSoundPlayerInterface> m_systemSoundPlayer;
858 
860  std::shared_ptr<avsCommon::sdkInterfaces::LocaleAssetsManagerInterface> m_assetsManager;
861 
871  std::unique_ptr<std::string> m_precedingExpectSpeechInitiator;
873 
875  std::unordered_set<std::shared_ptr<avsCommon::avs::CapabilityConfiguration>> m_capabilityConfigurations;
876 
878  std::shared_ptr<settings::WakeWordConfirmationSetting> m_wakeWordConfirmation;
879 
881  std::shared_ptr<settings::SpeechConfirmationSetting> m_speechConfirmation;
882 
884  std::shared_ptr<avsCommon::avs::CapabilityChangeNotifierInterface> m_capabilityChangeNotifier;
885 
887  std::shared_ptr<settings::WakeWordsSetting> m_wakeWordsSetting;
888 
890  std::shared_ptr<avsCommon::sdkInterfaces::PowerResourceManagerInterface> m_powerResourceManager;
891 
893  std::chrono::steady_clock::time_point m_stopCaptureReceivedTime;
894 
899  std::shared_ptr<ExpectSpeechTimeoutHandler> m_expectSpeechTimeoutHandler;
900 
905  std::string m_preCachedDialogRequestId;
906 
910  std::chrono::milliseconds m_timeSinceLastResumeMS;
911 
915  std::chrono::milliseconds m_timeSinceLastPartialMS;
916 
921 
925  bool m_usingEncoder;
926 
930  std::mutex m_mutex;
931 
936 
944  EncodingFormatResponse m_encodingAudioFormats;
945 
949  mutable std::mutex m_encodingFormatMutex;
950 
954  unsigned int m_audioBytesForMetricThreshold;
955 
959  std::string m_uploadMetricName;
960 
964  avsCommon::utils::metrics::DataPointDurationBuilder m_fetchContextTimeMetricData;
965 
967  std::shared_ptr<avsCommon::sdkInterfaces::PowerResourceManagerInterface::PowerResourceId> m_powerResourceId;
968 
976 };
977 
978 } // namespace aip
979 } // namespace capabilityAgents
980 } // namespace alexaClientSDK
981 
982 #endif // ALEXA_CLIENT_SDK_CAPABILITYAGENTS_AIP_INCLUDE_AIP_AUDIOINPUTPROCESSOR_H_
Definition: CapabilityAgent.h:47
void preHandleDirective(std::shared_ptr< DirectiveInfo > info) override
void addObserver(std::shared_ptr< ObserverInterface > observer)
MixingBehavior
Definition: MixingBehavior.h:25
std::map< std::string, std::pair< avsCommon::utils::AudioFormat::Encoding, avsCommon::utils::AudioFormat::Encoding > > EncodingFormatRequest
Definition: AudioInputProcessor.h:101
Definition: SettingEventMetadata.h:27
static const auto INVALID_INDEX
A reserved Index value which is considered invalid.
Definition: AudioInputProcessor.h:133
void onSendCompleted(avsCommon::sdkInterfaces::MessageRequestObserverInterface::Status status) override
EncodingFormatResponse requestEncodingAudioFormats(const EncodingFormatRequest &encodings)
static settings::SettingEventMetadata getWakeWordsEventsMetadata()
static std::shared_ptr< AudioInputProcessor > create(std::shared_ptr< avsCommon::sdkInterfaces::DirectiveSequencerInterface > directiveSequencer, std::shared_ptr< avsCommon::sdkInterfaces::MessageSenderInterface > messageSender, std::shared_ptr< avsCommon::sdkInterfaces::ContextManagerInterface > contextManager, std::shared_ptr< avsCommon::sdkInterfaces::FocusManagerInterface > focusManager, std::shared_ptr< avsCommon::avs::DialogUXStateAggregator > dialogUXStateAggregator, std::shared_ptr< avsCommon::sdkInterfaces::ExceptionEncounteredSenderInterface > exceptionEncounteredSender, std::shared_ptr< avsCommon::sdkInterfaces::UserInactivityMonitorInterface > userInactivityNotifier, std::shared_ptr< avsCommon::sdkInterfaces::SystemSoundPlayerInterface > systemSoundPlayer, const std::shared_ptr< avsCommon::sdkInterfaces::LocaleAssetsManagerInterface > &assetsManager, std::shared_ptr< settings::WakeWordConfirmationSetting > wakeWordConfirmation, std::shared_ptr< settings::SpeechConfirmationSetting > speechConfirmation, const std::shared_ptr< avsCommon::avs::CapabilityChangeNotifierInterface > &capabilityChangeNotifier, std::shared_ptr< settings::WakeWordsSetting > wakeWordsSetting=nullptr, std::shared_ptr< audioEncoderInterfaces::AudioEncoderInterface > audioEncoder=nullptr, AudioProvider defaultAudioProvider=AudioProvider::null(), std::shared_ptr< avsCommon::sdkInterfaces::PowerResourceManagerInterface > powerResourceManager=nullptr, std::shared_ptr< avsCommon::utils::metrics::MetricRecorderInterface > metricRecorder=nullptr, const std::shared_ptr< ExpectSpeechTimeoutHandler > &expectSpeechTimeoutHandler=nullptr)
void onDialogUXStateChanged(avsCommon::sdkInterfaces::DialogUXStateObserverInterface::DialogUXState newState) override
::std::string string
Definition: gtest-port.h:1097
static const AudioProvider & null()
Definition: AudioProvider.h:171
ContextRequestError
Definition: ContextRequesterInterface.h:40
Single-thread executor implementation.
Definition: Executor.h:45
ExceptionErrorType
Definition: ExceptionErrorType.h:28
static settings::SettingEventMetadata getWakeWordConfirmationMetadata()
void onContextAvailable(const std::string &jsonContext) override
FocusState
Definition: FocusState.h:29
void onExceptionReceived(const std::string &exceptionMessage) override
Stop returning data when all of the data in the buffer at the time close() was called has been read...
DialogUXState
The different dialog specific AVS UX states.
Definition: DialogUXStateObserverInterface.h:32
static std::shared_ptr< avsCommon::utils::metrics::MetricRecorderInterface > metricRecorder
Metric recorder shared ptr.
Definition: BaseAPLCapabilityAgentTest.cpp:261
void onContextFailure(const avsCommon::sdkInterfaces::ContextRequestError error) override
State
The different states the AudioInputProcessor can be in.
Definition: AudioInputProcessorObserverInterface.h:30
void handleDirectiveImmediately(std::shared_ptr< avsCommon::avs::AVSDirective > directive) override
Initiator
Definition: Initiator.h:28
bool setEncodingAudioFormat(avsCommon::utils::AudioFormat::Encoding encoding)
std::unordered_map< directiveRoutingRule::DirectiveRoutingRule, BlockingPolicy > DirectiveHandlerConfiguration
Definition: DirectiveHandlerConfiguration.h:32
Timer to schedule task for delayed and periodic execution.
Definition: Timer.h:39
void onResponseStatusReceived(avsCommon::sdkInterfaces::MessageRequestObserverInterface::Status status) override
void removeObserver(std::shared_ptr< ObserverInterface > observer)
Index
Index used for setting access.
Definition: StateReportGeneratorTest.cpp:41
std::map< std::string, avsCommon::utils::AudioFormat::Encoding > EncodingFormatResponse
Definition: AudioInputProcessor.h:107
Whether or not curl logs should be emitted.
Definition: AVSConnectionManager.h:36
static constexpr const char * KEYWORD_TEXT_STOP
A special keyword sent by supported wakeword engines for "Alexa, Stop".
Definition: AudioInputProcessor.h:130
A state observer for an AudioInputProcessor.
Definition: AudioInputProcessorObserverInterface.h:27
std::unordered_set< std::shared_ptr< avsCommon::avs::CapabilityConfiguration > > getCapabilityConfigurations() override
avsCommon::avs::DirectiveHandlerConfiguration getConfiguration() const override
std::future< bool > recognize(AudioProvider audioProvider, Initiator initiator, std::chrono::steady_clock::time_point startOfSpeechTimestamp=std::chrono::steady_clock::now(), avsCommon::avs::AudioInputStream::Index begin=INVALID_INDEX, avsCommon::avs::AudioInputStream::Index keywordEnd=INVALID_INDEX, std::string keyword="", std::shared_ptr< const std::vector< char >> KWDMetadata=nullptr, const std::string &initiatorToken="")
std::function< bool(const std::shared_ptr< EditableMessageRequest > &req, const std::string &resolveKey)> MessageRequestResolveFunction
Definition: MessageRequest.h:92
Definition: InternetConnectionObserverInterface.h:26
Status
Definition: MessageRequestObserverInterface.h:33
Definition: ExpectSpeechTimeoutHandlerInterface.h:31
std::bitset< PowerResourceTypeIndex::NUM_OF_TYPES > PartialStateBitSet
Definition: PowerResourceManagerInterface.h:94
ClosePoint
An enum class to indicate when the read() function should stop returning data after a call to close()...
Definition: AttachmentReader.h:59
static settings::SettingEventMetadata getSpeechConfirmationMetadata()
void cancelDirective(std::shared_ptr< DirectiveInfo > info) override
A dialog specific UX state observer.
Definition: DialogUXStateObserverInterface.h:29
void onFocusChanged(avsCommon::avs::FocusState newFocus, avsCommon::avs::MixingBehavior behavior) override
void handleDirective(std::shared_ptr< DirectiveInfo > info) override

AlexaClientSDK 3.0.0 - Copyright 2016-2022 Amazon.com, Inc. or its affiliates. All Rights Reserved. Licensed under the Apache License, Version 2.0