![]() |
AlexaClientSDK
3.0.0
A cross-platform, modular SDK for interacting with the Alexa Voice Service
|
#include <AudioInputProcessor.h>
Public Types | |
using | ObserverInterface = avsCommon::sdkInterfaces::AudioInputProcessorObserverInterface |
Alias to the AudioInputProcessorObserverInterface for brevity. More... | |
using | EncodingFormatRequest = std::map< std::string, std::pair< avsCommon::utils::AudioFormat::Encoding, avsCommon::utils::AudioFormat::Encoding > > |
using | EncodingFormatResponse = std::map< std::string, avsCommon::utils::AudioFormat::Encoding > |
using | ExpectSpeechTimeoutHandler = avsCommon::sdkInterfaces::ExpectSpeechTimeoutHandlerInterface |
![]() | |
enum | DialogUXState { DialogUXState::IDLE, DialogUXState::LISTENING, DialogUXState::EXPECTING, DialogUXState::THINKING, DialogUXState::SPEAKING, DialogUXState::FINISHED } |
The different dialog specific AVS UX states. More... | |
Public Member Functions | |
void | addObserver (std::shared_ptr< ObserverInterface > observer) |
void | removeObserver (std::shared_ptr< ObserverInterface > observer) |
std::future< bool > | recognize (AudioProvider audioProvider, Initiator initiator, std::chrono::steady_clock::time_point startOfSpeechTimestamp=std::chrono::steady_clock::now(), avsCommon::avs::AudioInputStream::Index begin=INVALID_INDEX, avsCommon::avs::AudioInputStream::Index keywordEnd=INVALID_INDEX, std::string keyword="", std::shared_ptr< const std::vector< char >> KWDMetadata=nullptr, const std::string &initiatorToken="") |
std::future< bool > | stopCapture () |
std::future< void > | resetState () |
bool | setEncodingAudioFormat (avsCommon::utils::AudioFormat::Encoding encoding) |
EncodingFormatResponse | requestEncodingAudioFormats (const EncodingFormatRequest &encodings) |
EncodingFormatResponse | getEncodingAudioFormats () const |
ContextRequesterInterface Functions | |
void | onContextAvailable (const std::string &jsonContext) override |
void | onContextFailure (const avsCommon::sdkInterfaces::ContextRequestError error) override |
MessageRequestObserverInterface Functions | |
void | onResponseStatusReceived (avsCommon::sdkInterfaces::MessageRequestObserverInterface::Status status) override |
void | onSendCompleted (avsCommon::sdkInterfaces::MessageRequestObserverInterface::Status status) override |
void | onExceptionReceived (const std::string &exceptionMessage) override |
CapabilityAgent/DirectiveHandlerInterface Functions | |
void | handleDirectiveImmediately (std::shared_ptr< avsCommon::avs::AVSDirective > directive) override |
void | preHandleDirective (std::shared_ptr< DirectiveInfo > info) override |
void | handleDirective (std::shared_ptr< DirectiveInfo > info) override |
void | cancelDirective (std::shared_ptr< DirectiveInfo > info) override |
void | onDeregistered () override |
avsCommon::avs::DirectiveHandlerConfiguration | getConfiguration () const override |
ChannelObserverInterface Functions | |
void | onFocusChanged (avsCommon::avs::FocusState newFocus, avsCommon::avs::MixingBehavior behavior) override |
DialogUXStateObserverInterface Functions | |
void | onDialogUXStateChanged (avsCommon::sdkInterfaces::DialogUXStateObserverInterface::DialogUXState newState) override |
CapabilityConfigurationInterface Functions | |
std::unordered_set< std::shared_ptr< avsCommon::avs::CapabilityConfiguration > > | getCapabilityConfigurations () override |
LocaleAssetsObserverInterface Functions | |
void | onLocaleAssetsChanged () override |
InternetConnectionObserverInterface Functions | |
void | onConnectionStatusChanged (bool connected) override |
![]() | |
virtual | ~CapabilityAgent ()=default |
void | preHandleDirective (std::shared_ptr< AVSDirective > directive, std::unique_ptr< sdkInterfaces::DirectiveHandlerResultInterface > result) override final |
bool | handleDirective (const std::string &messageId) override final |
void | cancelDirective (const std::string &messageId) override final |
![]() | |
virtual | ~DirectiveHandlerInterface ()=default |
virtual void | preHandleDirective (std::shared_ptr< avsCommon::avs::AVSDirective > directive, std::unique_ptr< DirectiveHandlerResultInterface > result)=0 |
![]() | |
virtual | ~ChannelObserverInterface ()=default |
![]() | |
virtual | ~StateProviderInterface ()=default |
virtual void | provideState (const avs::NamespaceAndName &stateProviderName, const ContextRequestToken stateRequestToken) |
virtual void | provideState (const avs::CapabilityTag &stateProviderName, const ContextRequestToken stateRequestToken) |
virtual bool | canStateBeRetrieved () |
virtual bool | hasReportableStateProperties () |
virtual bool | shouldQueryState () |
![]() | |
virtual | ~ContextRequesterInterface ()=default |
virtual void | onContextAvailable (const endpoints::EndpointIdentifier &endpointId, const avs::AVSContext &endpointContext, ContextRequestToken requestToken) |
virtual void | onContextFailure (const ContextRequestError error, ContextRequestToken token) |
![]() | |
virtual | ~CapabilityConfigurationInterface ()=default |
![]() | |
virtual | ~LocaleAssetsObserverInterface ()=default |
![]() | |
virtual | ~DialogUXStateObserverInterface ()=default |
![]() | |
virtual | ~InternetConnectionObserverInterface ()=default |
![]() | |
RequiresShutdown (const std::string &name) | |
virtual | ~RequiresShutdown () |
Destructor. More... | |
const std::string & | name () const |
void | shutdown () |
bool | isShutdown () const |
Static Public Attributes | |
static constexpr const char * | KEYWORD_TEXT_STOP = "STOP" |
A special keyword sent by supported wakeword engines for "Alexa, Stop". More... | |
static const auto | INVALID_INDEX = std::numeric_limits<avsCommon::avs::AudioInputStream::Index>::max() |
A reserved Index value which is considered invalid. More... | |
Additional Inherited Members | |
![]() | |
CapabilityAgent (const std::string &nameSpace, std::shared_ptr< sdkInterfaces::ExceptionEncounteredSenderInterface > exceptionEncounteredSender) | |
virtual std::shared_ptr< DirectiveInfo > | createDirectiveInfo (std::shared_ptr< AVSDirective > directive, std::unique_ptr< sdkInterfaces::DirectiveHandlerResultInterface > result) |
void | removeDirective (const std::string &messageId) |
void | sendExceptionEncounteredAndReportFailed (std::shared_ptr< DirectiveInfo > info, const std::string &message, avsCommon::avs::ExceptionErrorType type=avsCommon::avs::ExceptionErrorType::INTERNAL_ERROR) |
const std::pair< std::string, std::string > | buildJsonEventString (const std::string &eventName, const std::string &dialogRequestIdString="", const std::string &payload="{}", const std::string &context="") const |
![]() | |
const std::string | m_namespace |
The namespace of the capability agent. More... | |
std::shared_ptr< sdkInterfaces::ExceptionEncounteredSenderInterface > | m_exceptionEncounteredSender |
Object to use to send exceptionEncountered messages. More... | |
This class implements a SpeechRecognizer
capability agent.
AudioInputProcessor
is a top-level component which should be instantiated in application code, and connected up to other interfaces in the Alexa Client SDK. Interfaces which are used directly by the AudioInputProcessor are passed directly to its create()
function. To start sending an audio event, application code should call one of the recognize()
functions. To stop sending audio, application code should call the stopCapture()
function. Application code can also register objects which implement the ObserverInterface
to receive notifications when the AudioInputProcessor
state changes.
using alexaClientSDK::capabilityAgents::aip::AudioInputProcessor::EncodingFormatRequest = std:: map<std::string, std::pair<avsCommon::utils::AudioFormat::Encoding, avsCommon::utils::AudioFormat::Encoding> > |
Request to configure AudioInputProcessor
to provide multiple audio streams for a single Recognize event. Key in the map is resolve key, which will be used by caller to resolve the unresolved MessageRequest
. Each resolve key correlates to a pair of encoding formats. The first format is the preferred format, and the second one is the fallback format which is used if the preferred one is not supported.
using alexaClientSDK::capabilityAgents::aip::AudioInputProcessor::EncodingFormatResponse = std::map<std::string, avsCommon::utils::AudioFormat::Encoding> |
Response to caller when AIP receives EncodingFormatRequest. Key in the map is resolve key from the request, and corresponding value is the confirmed encoding format that AIP will provide for this resolve key.
using alexaClientSDK::capabilityAgents::aip::AudioInputProcessor::ExpectSpeechTimeoutHandler = avsCommon::sdkInterfaces::ExpectSpeechTimeoutHandlerInterface |
This function allows applications to tell the AudioInputProcessor
that the ExpectSpeech
directive's timeout will be handled externally and stops the AudioInputProcessor
from starting an internal timer to handle it.
timeout | The timeout of the ExpectSpeech directive. |
expectSpeechTimedOut | An std::function that applications may call if the timeout expires. This results in an ExpectSpeechTimedOut event being sent to AVS if no recognize() call is made prior to the timeout expiring. This function will return a future which is true if called in the correct state and an ExpectSpeechTimeout Event was sent successfully, or false otherwise. |
true
if the ExpectSpeech
directive's timeout will be handled externally and should not be handled via an internal timer owned by the AudioInputProcessor
.AudioInputProcessorObserverInterface's
onStateChanged() method to notify of a state change to EXPECTING_SPEECH
. expectSpeechTimedOut
parameter is no longer valid. using alexaClientSDK::capabilityAgents::aip::AudioInputProcessor::ObserverInterface = avsCommon::sdkInterfaces::AudioInputProcessorObserverInterface |
Alias to the AudioInputProcessorObserverInterface
for brevity.
void alexaClientSDK::capabilityAgents::aip::AudioInputProcessor::addObserver | ( | std::shared_ptr< ObserverInterface > | observer | ) |
Adds an observer to be notified of AudioInputProcessor state changes.
observer | The observer object to add. |
|
overridevirtual |
Cancel an ongoing preHandleDirective()
or handleDirective()
operation for the AVSDirective
in . Once this has been called the CapabilityAgent
should not expect to receive further calls regarding this directive.
AVSDirectives
.info | The DirectiveInfo instance for the AVSDirective to process. |
Implements alexaClientSDK::avsCommon::avs::CapabilityAgent.
|
static |
Creates a new AudioInputProcessor
instance.
directiveSequencer | The Directive Sequencer to register with for receiving directives. |
messageSender | The object to use for sending events. |
contextManager | The AVS Context manager used to generate system context for events. |
focusManager | The channel focus manager used to manage usage of the dialog channel. |
dialogUXStateAggregator | The dialog state aggregator which tracks UX states related to dialog. |
exceptionEncounteredSender | The object to use for sending AVS Exception messages. |
userInactivityNotifier | The object to use for resetting user inactivity. |
systemSoundPlayer | The instance of the system sound player. |
assetsManager | Responsible for retrieving and changing the wake words and locale. |
wakeWordConfirmation | The wake word confirmation setting. |
speechConfirmation | The end of speech confirmation setting. |
capabilityChangeNotifier | The object with which to notify observers of AudioInputProcessor capability configurations change. |
wakeWordsSetting | The setting that represents the enabled wake words. This parameter is required if this device supports wake words. |
audioEncoder | The Audio Encoder used to encode audio inputs. This parameter is optional and defaults to nullptr, which disable the encoding feature. |
defaultAudioProvider | A default avsCommon::AudioProvider to use for ExpectSpeech if the previous provider is not readable (avsCommon::AudioProvider::alwaysReadable ). This parameter is optional and defaults to an invalid avsCommon::AudioProvider . |
powerResourceManager | Power Resource Manager. |
metricRecorder | The metric recorder. |
expectSpeechTimeoutHandler | An optional interface that applications may provide to specify external handling of the ExpectSpeech directive's timeout. If provided, this function must remain valid for the lifetime of the AudioInputProcessor . |
std::shared_ptr
to the new AudioInputProcessor
instance.
|
overridevirtual |
Returns the configurations of the capability interfaces being implemented.
Implements alexaClientSDK::avsCommon::sdkInterfaces::CapabilityConfigurationInterface.
|
overridevirtual |
Returns the configuration of the directive handler.
The configuration consists of multiple directive routing rules and their respective blocking policy. The directives will be matched from the most specific rule (with all fields defined) to the least specific rule (which only matches the directive endpointId).
avs::DirectiveHandlerConfiguration
of the handler. Implements alexaClientSDK::avsCommon::sdkInterfaces::DirectiveHandlerInterface.
EncodingFormatResponse alexaClientSDK::capabilityAgents::aip::AudioInputProcessor::getEncodingAudioFormats | ( | ) | const |
Function to get encoding audio formats.
|
static |
Gets the speech confirmation events metadata.
|
static |
Gets the wake word confirmation events metadata.
|
static |
Gets the wake words events metadata.
|
overridevirtual |
Handle the action specified by the AVSDirective
in info
. The handling of subsequent directives with the same DialogRequestId
may be blocked until the DirectiveHandler
calls the setSucceeded()
method of the DirectiveHandlingResult
present in info
. If handling of this directive fails setFailed()
should be called to indicate a failure.
AVSDirectives
.info | The DirectiveInfo instance for the AVSDirective to process. |
Implements alexaClientSDK::avsCommon::avs::CapabilityAgent.
|
overridevirtual |
Handle the action specified AVSDirective
. Once this has been called the DirectiveHandler
should not expect to receive further calls regarding this directive.
AVSDirectives
. ExceptionEncountered
message should be sent to AVS.directive | The directive to handle. |
Implements alexaClientSDK::avsCommon::sdkInterfaces::DirectiveHandlerInterface.
|
overridevirtual |
Take necessary actions as a result of an internet connection change.
connected | Whether or not we are currently connected to the internet. |
Implements alexaClientSDK::avsCommon::sdkInterfaces::InternetConnectionObserverInterface.
|
overridevirtual |
This is called by the ContextManager once the context is ready and available.
ContextRequester
should perform minimum processing and return quickly. Otherwise it will block the processing of updating the of other ContextProviders
.jsonContext | Context information.Context provided is of the format {"context"[{...}, {...}]} |
Reimplemented from alexaClientSDK::avsCommon::sdkInterfaces::ContextRequesterInterface.
|
overridevirtual |
The contextManager calls this if it is unable to process a getContext
request successfully.
ContextRequester
should perform minimum processing and return quickly. Otherwise it will block the processing of updating the of other ContextProviders
.error | The reason why the getContext request failed. |
Reimplemented from alexaClientSDK::avsCommon::sdkInterfaces::ContextRequesterInterface.
|
overridevirtual |
Notification that this handler has been de-registered and will not receive any more calls.
Reimplemented from alexaClientSDK::avsCommon::avs::CapabilityAgent.
|
overridevirtual |
This function is called whenever the AVS UX dialog state of the system changes. This function will block processing of other state changes, so any implementation of this should return quickly.
newState | The new dialog specific AVS UX state. |
Implements alexaClientSDK::avsCommon::sdkInterfaces::DialogUXStateObserverInterface.
|
overridevirtual |
|
overridevirtual |
Used to notify the observer of the Channel of focus changes. Once called, the client should make a user observable change only and return immediately. Any additional work that needs to be done should be done on a separate thread or after returning. "User observable change" here refers to events that the end user of the product can visibly see or hear. For example, Alexa speech or music playing would be examples of user observable changes. Other work, such as database storing, logging, or communicating via network should be done on a different thread. Not doing so could result in delays for other clients trying to access the Channel.
newFocus | The new Focus of the channel. |
behavior | The mixingBehavior for the ChannelObserver to take as per the interrupt model |
Reimplemented from alexaClientSDK::avsCommon::avs::CapabilityAgent.
|
overridevirtual |
Used to notify the observer of locale assets(wakewords, locales) update.
Implements alexaClientSDK::avsCommon::sdkInterfaces::LocaleAssetsObserverInterface.
|
overridevirtual |
Called when the Response code is received.
status | The status of the response that was received. |
Reimplemented from alexaClientSDK::avsCommon::sdkInterfaces::MessageRequestObserverInterface.
|
overridevirtual |
|
overridevirtual |
Notification that a directive has arrived. This notification gives the DirectiveHandler a chance to prepare for handling of an AVSDirective
. If an error occurs during the pre-Handling phase and that error should cancel the handling of subsequent AVSDirectives
with the same DialogRequestId
, the DirectiveHandler
should call the setFailed
method on the result
instance passed in to this call.
AVSDirectives
.info | The DirectiveInfo instance for the AVSDirective to process. |
Implements alexaClientSDK::avsCommon::avs::CapabilityAgent.
std::future<bool> alexaClientSDK::capabilityAgents::aip::AudioInputProcessor::recognize | ( | AudioProvider | audioProvider, |
Initiator | initiator, | ||
std::chrono::steady_clock::time_point | startOfSpeechTimestamp = std::chrono::steady_clock::now() , |
||
avsCommon::avs::AudioInputStream::Index | begin = INVALID_INDEX , |
||
avsCommon::avs::AudioInputStream::Index | keywordEnd = INVALID_INDEX , |
||
std::string | keyword = "" , |
||
std::shared_ptr< const std::vector< char >> | KWDMetadata = nullptr , |
||
const std::string & | initiatorToken = "" |
||
) |
This function asks the AudioInputProcessor
to send a Recognize Event to AVS and start streaming from audioProvider
, which transitions it to the RECOGNIZING
state. This function can be called in any state except BUSY
, however the flags in AudioProvider
will dictate whether the call is allowed to override an ongoing Recognize Event. If the flags do not allow an override, no event will be sent, no state change will occur, and the function will fail.
A special case is that the function will also fail if the keyword passed in is equal to KEYWORD_TEXT_STOP
. This check is case insensitive.
MessageSenderInterface
to start streaming if the the start index or any subsequent data has already expired from the buffer. In addition, it is assumed that MessageSenderInterface
will stop streaming immediately if it detects an overrun, and notify AIP of this condition (through a MessageRequest
callback). These requirements ensure that the begin
and keywordEnd
indices will remain accurate for the actual audio samples sent to AVS.False-wakeword detection in the cloud will be used when all of the following requirements are met:
initiator == Initiator::WAKEWORD
begin != INVALID_INDEX
end != INVALID_INDEX
!keyword.empty()
If all of the above requirements are met, audio steaming will start between 0 and 500ms before begin
, and the cloud will perform additional verification of the wakeword audio before proceeding to recognize the subsequent audio.
audioProvider | The AudioProvider to stream audio from. |
initiator | The type of interface that initiated this recognize event. |
startOfSpeechTimestamp | Moment in time when user started talking to Alexa. This parameter is optional and it is used to measure user perceived latency. The startOfSpeechTimestamp must include the wakeword duration if the audio stream is initiated by a wakeword, otherwise the latency calculation will not be correct. |
begin | The Index in audioProvider.stream where audio streaming should begin. This parameter is optional, and defaults to INVALID_INDEX . When this parameter is not specified, recognize() will stream audio starting at the time of the recognize() call. If the initiator is WAKEWORD , and this and keywordEnd are specified, streaming will begin between 0 and 500ms prior to the Index specified by this parameter to attempt false wakeword validation. |
keywordEnd | The Index in audioProvider.stream where the wakeword ends. This parameter is optional, and defaults to INVALID_INDEX . This parameter is ignored if initiator is not WAKEWORD . |
keyword | The text of the keyword which was recognized. This parameter is optional, and defaults to an empty string. This parameter is ignored if initiator is not WAKEWORD . The only value currently accepted by AVS for keyword is "ALEXA". See https://developer.amazon.com/public/solutions/alexa/alexa-voice-service/reference/context#recognizerstate |
KWDMetadata | Wake word engine metadata. |
initiatorToken | An optional opaque string associated with the interaction. |
true
if the Recognize Event was started successfully, else false
. void alexaClientSDK::capabilityAgents::aip::AudioInputProcessor::removeObserver | ( | std::shared_ptr< ObserverInterface > | observer | ) |
Removes an observer from the set of observers to be notified of AudioInputProcessor state changes.
removeObserver()
from ObserverInterface::onStateChanged()
will result in a deadlock.observer | The observer object to remove. |
EncodingFormatResponse alexaClientSDK::capabilityAgents::aip::AudioInputProcessor::requestEncodingAudioFormats | ( | const EncodingFormatRequest & | encodings | ) |
Function to request multiple audio streams from AudioInputProcessor
in a single Recognize event. This is an alternative API to setEncodingAudioFormat()
.
encodings | A map of resolveKey to a pair of encoding formats. Each resolveKey stands for an audio stream. The first encoding format is the requested format, and the second one is backup format if the requested one isn't supported by AudioInputProcessor . |
AudioInputProcessor
will check backup format. If neither of them is supported, the corresponding resolve key will be removed from result. std::future<void> alexaClientSDK::capabilityAgents::aip::AudioInputProcessor::resetState | ( | ) |
This function forces the AudioInputProcessor
back to the IDLE
state. This function can be called in any state, and will end any Event which is currently in progress.
AudioInputProcessor
is back to the IDLE
state. bool alexaClientSDK::capabilityAgents::aip::AudioInputProcessor::setEncodingAudioFormat | ( | avsCommon::utils::AudioFormat::Encoding | encoding | ) |
Set encoding for the audio format. The new encoding will be used for future utterances. Any audio stream already in progress will not be affected. This is an alternative API to requestEncodingAudioFormats()
, but will configure AudioInputProcessor
to only produce one audio stream.
encoding | The encoding format to use. |
false
on failure to set the encoding. std::future<bool> alexaClientSDK::capabilityAgents::aip::AudioInputProcessor::stopCapture | ( | ) |
This function asks the AudioInputProcessor
to stop streaming audio and end an ongoing Recognize Event, which transitions it to the BUSY
state. This function can only be called in the RECOGNIZING
state; calling it while in another state will fail.
true
if called in the correct state and a Recognize Event's audio streaming was stopped successfully, else false
.
|
static |
A reserved Index
value which is considered invalid.
|
static |
A special keyword sent by supported wakeword engines for "Alexa, Stop".
AlexaClientSDK 3.0.0 - Copyright 2016-2022 Amazon.com, Inc. or its affiliates. All Rights Reserved. Licensed under the Apache License, Version 2.0