You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

600 lines
23 KiB

/***************************************************** vim:set ts=4 sw=4 sts=4:
Speaker class.
This class is in charge of getting the messages, warnings and text from
the queue and call the plug ins function to actually speak the texts.
(C) 2002-2003 by José Pablo Ezequiel "Pupeno" Fernández <>
(C) 2003-2004 by Olaf Schmidt <>
(C) 2004 by Gary Cramblitt <>
Original author: José Pablo Ezequiel "Pupeno" Fernández
* *
* This program is free software; you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation; either version 2 of the License. *
* *
#ifndef _SPEAKER_H_
#define _SPEAKER_H_
// Qt includes.
#include <tqobject.h>
#include <tqvaluevector.h>
#include <tqevent.h>
// KTTSD includes.
#include <speechdata.h>
#include <pluginproc.h>
#include <stretcher.h>
#include <talkercode.h>
#include <ssmlconvert.h>
class Player;
class QTimer;
class TalkerMgr;
* Type of utterance.
enum uttType
utText, /**< Text */
utInterruptMsg, /**< Interruption text message */
utInterruptSnd, /**< Interruption sound file */
utResumeMsg, /**< Resume text message */
utResumeSnd, /**< Resume sound file */
utMessage, /**< Message */
utWarning, /**< Warning */
utScreenReader, /**< Screen Reader Output */
utStartOfJob, /**< Start-of-job */
utEndOfJob /**< End-of-job */
* Processing state of an utterance.
enum uttState
usNone, /**< Null state. Brand new utterance. */
usWaitingTransform, /**< Waiting to be transformed (XSLT) */
usTransforming, /**< Transforming the utterance (XSLT). */
usWaitingSay, /**< Waiting to start synthesis. */
usWaitingSynth, /**< Waiting to be synthesized and audibilized. */
usWaitingSignal, /**< Waiting to emit a textStarted or textFinished signal. */
usSaying, /**< Plugin is synthesizing and audibilizing. */
usSynthing, /**< Plugin is synthesizing only. */
usSynthed, /**< Plugin has finished synthesizing. Ready for stretch. */
usStretching, /**< Adjusting speed. */
usStretched, /**< Speed adjustment finished. Ready for playback. */
usPlaying, /**< Playing on Audio Player. */
usPaused, /**< Paused on Audio Player due to user action. */
usPreempted, /**< Paused on Audio Player due to Screen Reader Output. */
usFinished /**< Ready for deletion. */
* Structure containing an utterance being synthesized or audibilized.
struct Utt{
mlText* sentence; /* The text, talker, appId, and sequence num. */
uttType utType; /* The type of utterance (text, msg, screen reader) */
bool isSSML; /* True if the utterance contains SSML markup. */
uttState state; /* Processing state of the utterance. */
SSMLConvert* transformer; /* XSLT transformer. */
PlugInProc* plugin; /* The plugin that synthesizes the utterance. */
Stretcher* audioStretcher; /* Audio stretcher object. Adjusts speed. */
TQString audioUrl; /* Filename containing synthesized audio. Null if
plugin has not yet synthesized the utterance, or if
plugin does not support synthesis. */
Player* audioPlayer; /* The audio player audibilizing the utterance. Null
if not currently audibilizing or if plugin doesn't
support synthesis. */
* Iterator for queue of utterances.
typedef TQValueVector<Utt>::iterator uttIterator;
// Timer interval for checking whether audio playback is finished.
const int timerInterval = 500;
* This class is in charge of getting the messages, warnings and text from
* the queue and call the plug ins function to actually speak the texts.
class Speaker : public QObject{
* Constructor
* Calls load plug ins
Speaker(SpeechData* speechData, TalkerMgr* talkerMgr,
TQObject *parent = 0, const char *name = 0);
* Destructor
* Tells the thread to exit
void requestExit();
* Main processing loop. Dequeues utterances and sends them to the
* plugins and/or Audio Player.
void doUtterances();
* Determine if kttsd is currently speaking any text jobs.
* @return True if currently speaking any text jobs.
bool isSpeakingText();
* Get the job number of the current text job.
* @return Job number of the current text job. 0 if no jobs.
* Note that the current job may not be speaking. See @ref isSpeakingText.
* @see getTextJobState.
* @see isSpeakingText
uint getCurrentTextJob();
* Remove a text job from the queue.
* @param jobNum Job number of the text job.
* The job is deleted from the queue and the @ref textRemoved signal is emitted.
* If there is another job in the text queue, and it is marked speakable,
* that job begins speaking.
void removeText(const uint jobNum);
* Start a text job at the beginning.
* @param jobNum Job number of the text job.
* Rewinds the job to the beginning.
* The job is marked speakable.
* If there are other speakable jobs preceeding this one in the queue,
* those jobs continue speaking and when finished, this job will begin speaking.
* If there are no other speakable jobs preceeding this one, it begins speaking.
* The @ref textStarted signal is emitted when the text job begins speaking.
* When all the sentences of the job have been spoken, the job is marked for deletion from
* the text queue and the @ref textFinished signal is emitted.
void startText(const uint jobNum);
* Stop a text job and rewind to the beginning.
* @param jobNum Job number of the text job.
* The job is marked not speakable and will not be speakable until @ref startText or @ref resumeText
* is called.
* If there are speaking jobs preceeding this one in the queue, they continue speaking.
* If the job is currently speaking, the @ref textStopped signal is emitted and the job stops speaking.
* Depending upon the speech engine and plugin used, speeking may not stop immediately
* (it might finish the current sentence).
void stopText(const uint jobNum);
* Pause a text job.
* @param jobNum Job number of the text job.
* The job is marked as paused and will not be speakable until @ref resumeText or
* @ref startText is called.
* If there are speaking jobs preceeding this one in the queue, they continue speaking.
* If the job is currently speaking, the @ref textPaused signal is emitted and the job stops speaking.
* Depending upon the speech engine and plugin used, speeking may not stop immediately
* (it might finish the current sentence).
* @see resumeText
void pauseText(const uint jobNum);
* Start or resume a text job where it was paused.
* @param jobNum Job number of the text job.
* The job is marked speakable.
* If the job is currently speaking, or is waiting to be spoken (speakable
* state), the resumeText() call is ignored.
* If the job is currently queued, or is finished, it is the same as calling
* @ref startText .
* If there are speaking jobs preceeding this one in the queue, those jobs continue speaking and,
* when finished this job will begin speaking where it left off.
* The @ref textResumed signal is emitted when the job resumes.
* @see pauseText
void resumeText(const uint jobNum);
* Move a text job down in the queue so that it is spoken later.
* @param jobNum Job number of the text job.
* If the job is currently speaking, it is paused.
* If the next job in the queue is speakable, it begins speaking.
void moveTextLater(const uint jobNum);
* Jump to the first sentence of a specified part of a text job.
* @param partNum Part number of the part to jump to. Parts are numbered starting at 1.
* @param jobNum Job number of the text job.
* @return Part number of the part actually jumped to.
* If partNum is greater than the number of parts in the job, jumps to last part.
* If partNum is 0, does nothing and returns the current part number.
* If no such job, does nothing and returns 0.
* Does not affect the current speaking/not-speaking state of the job.
int jumpToTextPart(const int partNum, const uint jobNum);
* Advance or rewind N sentences in a text job.
* @param n Number of sentences to advance (positive) or rewind (negative)
* in the job.
* @param jobNum Job number of the text job.
* @return Sequence number of the sentence actually moved to.
* Sequence numbers are numbered starting at 1.
* If no such job, does nothing and returns 0.
* If n is zero, returns the current sequence number of the job.
* Does not affect the current speaking/not-speaking state of the job.
uint moveRelTextSentence(const int n, const uint jobNum);
* Emitted whenever reading a text was started or resumed
void readingStarted();
* Emitted whenever reading a text was finished,
* or paused, or stopped before it was finished
void readingStopped();
* Emitted whenever a message or warning interrupts reading a text
void readingInterrupted();
* Emitted whenever reading a text is resumed after it was interrupted
* Note: In function resumeText, readingStarted is called instead
void readingResumed();
/* The following signals correspond to the signals in the KSpeech interface. */
* This signal is emitted when the speech engine/plugin encounters a marker in the text.
* @param appId DCOP application ID of the application that queued the text.
* @param markerName The name of the marker seen.
* @see markers
void markerSeen(const TQCString& appId, const TQString& markerName);
* This signal is emitted whenever a sentence begins speaking.
* @param appId DCOP application ID of the application that queued the text.
* @param jobNum Job number of the text job.
* @param seq Sequence number of the text.
void sentenceStarted(TQString text, TQString language, const TQCString& appId,
const uint jobNum, const uint seq);
* This signal is emitted when a sentence has finished speaking.
* @param appId DCOP application ID of the application that queued the text.
* @param jobNum Job number of the text job.
* @param seq Sequence number of the text.
void sentenceFinished(const TQCString& appId, const uint jobNum, const uint seq);
* This signal is emitted whenever speaking of a text job begins.
* @param appId The DCOP senderId of the application that created the job. NULL if kttsd.
* @param jobNum Job number of the text job.
void textStarted(const TQCString& appId, const uint jobNum);
* This signal is emitted whenever a text job is finished. The job has
* been marked for deletion from the queue and will be deleted when another
* job reaches the Finished state. (Only one job in the text queue may be
* in state Finished at one time.) If @ref startText or @ref resumeText is
* called before the job is deleted, it will remain in the queue for speaking.
* @param appId The DCOP senderId of the application that created the job.
* @param jobNum Job number of the text job.
void textFinished(const TQCString& appId, const uint jobNum);
* This signal is emitted whenever a speaking text job stops speaking.
* @param appId The DCOP senderId of the application that created the job.
* @param jobNum Job number of the text job.
void textStopped(const TQCString& appId, const uint jobNum);
* This signal is emitted whenever a speaking text job is paused.
* @param appId The DCOP senderId of the application that created the job.
* @param jobNum Job number of the text job.
void textPaused(const TQCString& appId, const uint jobNum);
* This signal is emitted when a text job, that was previously paused, resumes speaking.
* @param appId The DCOP senderId of the application that created the job.
* @param jobNum Job number of the text job.
void textResumed(const TQCString& appId, const uint jobNum);
* Processes events posted by ThreadedPlugIns.
virtual bool event ( TQEvent * e );
private slots:
* Received from PlugIn objects when they finish asynchronous synthesis.
void slotSynthFinished();
* Received from PlugIn objects when they finish asynchronous synthesis
* and audibilizing.
void slotSayFinished();
* Received from PlugIn objects when they asynchronously stopText.
void slotStopped();
* Received from audio stretcher when stretching (speed adjustment) is finished.
void slotStretchFinished();
* Received from transformer (SSMLConvert) when transforming is finished.
void slotTransformFinished();
/** Received from PlugIn object when they encounter an error.
* @param keepGoing True if the plugin can continue processing.
* False if the plugin cannot continue, for example,
* the speech engine could not be started.
* @param msg Error message.
void slotError(bool keepGoing, const TQString &msg);
* Received from Timer when it fires.
* Check audio player to see if it is finished.
void slotTimeout();
* Converts an utterance state enumerator to a displayable string.
* @param state Utterance state.
* @return Displayable string for utterance state.
TQString uttStateToStr(uttState state);
* Converts an utterance type enumerator to a displayable string.
* @param utType Utterance type.
* @return Displayable string for utterance type.
TQString uttTypeToStr(uttType utType);
* Converts a plugin state enumerator to a displayable string.
* @param state Plugin state.
* @return Displayable string for plugin state.
TQString pluginStateToStr(pluginState state);
* Converts a job state enumerator to a displayable string.
* @param state Job state.
* @return Displayable string for job state.
TQString jobStateToStr(int state);
* Determines whether the given text is SSML markup.
bool isSsml(const TQString &text);
* Determines the initial state of an utterance. If the utterance contains
* SSML, the state is set to usWaitingTransform. Otherwise, if the plugin
* supports async synthesis, sets to usWaitingSynth, otherwise usWaitingSay.
* If an utterance has already been transformed, usWaitingTransform is
* skipped to either usWaitingSynth or usWaitingSay.
* @param utt The utterance.
void setInitialUtteranceState(Utt &utt);
* Returns true if the given job and sequence number is already in the utterance queue.
bool isInUtteranceQueue(uint jobNum, uint seqNum);
* Gets the next utterance to be spoken from speechdata and adds it to the queue.
* @return True if one or more utterances were added to the queue.
* Checks for waiting ScreenReaderOutput, Warnings, Messages, or Text,
* in that order.
* If Warning or Message and interruption messages have been configured,
* adds those to the queue as well.
* Determines which plugin should be used for the utterance.
bool getNextUtterance();
* Given an iterator pointing to the m_uttQueue, deletes the utterance
* from the queue. If the utterance is currently being processed by a
* plugin or the Audio Player, halts that operation and deletes Audio Player.
* Also takes care of deleting temporary audio file.
* @param it Iterator pointer to m_uttQueue.
* @return Iterator pointing to the next utterance in the
* queue, or m_uttQueue.end().
uttIterator deleteUtterance(uttIterator it);
* Given an iterator pointing to the m_uttQueue, starts playing audio if
* 1) An audio file is ready to be played, and
* 2) It is not already playing.
* If another audio player is already playing, pauses it before starting
* the new audio player.
* @param it Iterator pointer to m_uttQueue.
* @return True if an utterance began playing or resumed.
bool startPlayingUtterance(uttIterator it);
* Delete any utterances in the queue with this jobNum.
* @param jobNum The Job Number of the utterance(s) to delete.
* If currently processing any deleted utterances, stop them.
void deleteUtteranceByJobNum(const uint jobNum);
* Pause the utterance with this jobNum and if it is playing on the Audio Player,
* pause the Audio Player.
* @param jobNum The Job Number of the utterance to pause.
void pauseUtteranceByJobNum(const uint jobNum);
* Takes care of emitting reading interrupted/resumed and sentence started signals.
* Should be called just before audibilizing an utterance.
* @param it Iterator pointer to m_uttQueue.
void prePlaySignals(uttIterator it);
* Takes care of emitting sentenceFinished signal.
* Should be called immediately after an utterance has completed playback.
* @param it Iterator pointer to m_uttQueue.
void postPlaySignals(uttIterator it);
* Constructs a temporary filename for plugins to use as a suggested filename
* for synthesis to write to.
* @return Full pathname of suggested file.
TQString makeSuggestedFilename();
* Creates and returns a player object based on user option.
Player* createPlayerObject();
* SpeechData local pointer
SpeechData* m_speechData;
* TalkerMgr local pointer.
TalkerMgr* m_talkerMgr;
* True if the speaker was requested to exit.
volatile bool m_exitRequested;
* Queue of utterances we are currently processing.
TQValueVector<Utt> m_uttQueue;
* True when text job reading has been interrupted.
bool m_textInterrupted;
* Used to prevent doUtterances from prematurely exiting.
bool m_again;
* Which audio player to use.
* 0 = aRts
* 1 = gstreamer
* 2 = ALSA
int m_playerOption;
* Audio stretch factor (Speed).
float m_audioStretchFactor;
* GStreamer sink name to use, or ALSA PCM device name.
TQString m_sinkName;
* Timer for monitoring audio player.
TQTimer* m_timer;
* Current Text job being processed.
uint m_currentJobNum;
* Job Number, appId, and sequence number of the last text sentence queued.
uint m_lastJobNum;
TQCString m_lastAppId;
uint m_lastSeq;
* Some parameters used by ALSA plugin.
* Size of buffer interrupt period (in frames)
* Number of periods in buffer.
uint m_periodSize;
uint m_periods;
* Debug level in players.
uint m_playerDebugLevel;
#endif // _SPEAKER_H_