tdeaccessibility/kttsd/kttsd/speaker.h

/***************************************************** vim:set ts=4 sw=4 sts=4:
  Speaker class.

  This class is in charge of getting the messages, warnings and text from
  the queue and call the plug ins function to actually speak the texts.
  -------------------
  Copyright:
  (C) 2002-2003 by José Pablo Ezequiel "Pupeno" Fernández <pupeno@kde.org>
  (C) 2003-2004 by Olaf Schmidt <ojschmidt@kde.org>
  (C) 2004 by Gary Cramblitt <garycramblitt@comcast.net>
  -------------------
  Original author: José Pablo Ezequiel "Pupeno" Fernández
 ******************************************************************************/

/******************************************************************************
 *                                                                            *
 *    This program is free software; you can redistribute it and/or modify    *
 *    it under the terms of the GNU General Public License as published by    *
 *    the Free Software Foundation; either version 2 of the License.          *
 *                                                                            *
 ******************************************************************************/

#ifndef _SPEAKER_H_
#define _SPEAKER_H_

// TQt includes.
#include <tqobject.h>
#include <tqvaluevector.h>
#include <tqevent.h>

// KTTSD includes.
#include <speechdata.h>
#include <pluginproc.h>
#include <stretcher.h>
#include <talkercode.h>
#include <ssmlconvert.h>

class Player;
class TQTimer;
class TalkerMgr;

/**
* Type of utterance.
*/
enum uttType
{
    utText,                      /**< Text */
    utInterruptMsg,              /**< Interruption text message */
    utInterruptSnd,              /**< Interruption sound file */
    utResumeMsg,                 /**< Resume text message */
    utResumeSnd,                 /**< Resume sound file */
    utMessage,                   /**< Message */
    utWarning,                   /**< Warning */
    utScreenReader,              /**< Screen Reader Output */
    utStartOfJob,                /**< Start-of-job */
    utEndOfJob                   /**< End-of-job */
};

/**
* Processing state of an utterance.
*/
enum uttState
{
    usNone,                      /**< Null state. Brand new utterance. */
    usWaitingTransform,          /**< Waiting to be transformed (XSLT) */
    usTransforming,              /**< Transforming the utterance (XSLT). */
    usWaitingSay,                /**< Waiting to start synthesis. */
    usWaitingSynth,              /**< Waiting to be synthesized and audibilized. */
    usWaitingSignal,             /**< Waiting to emit a textStarted or textFinished signal. */
    usSaying,                    /**< Plugin is synthesizing and audibilizing. */
    usSynthing,                  /**< Plugin is synthesizing only. */
    usSynthed,                   /**< Plugin has finished synthesizing.  Ready for stretch. */
    usStretching,                /**< Adjusting speed. */
    usStretched,                 /**< Speed adjustment finished.  Ready for playback. */
    usPlaying,                   /**< Playing on Audio Player. */
    usPaused,                    /**< Paused on Audio Player due to user action. */
    usPreempted,                 /**< Paused on Audio Player due to Screen Reader Output. */
    usFinished                   /**< Ready for deletion. */
};

/**
* Structure containing an utterance being synthesized or audibilized.
*/
struct Utt{
    mlText* sentence;            /* The text, talker, appId, and sequence num. */
    uttType utType;              /* The type of utterance (text, msg, screen reader) */
    bool isSSML;                 /* True if the utterance contains SSML markup. */
    uttState state;              /* Processing state of the utterance. */
    SSMLConvert* transformer;    /* XSLT transformer. */
    PlugInProc* plugin;          /* The plugin that synthesizes the utterance. */
    Stretcher* audioStretcher;   /* Audio stretcher object.  Adjusts speed. */
    TQString audioUrl;            /* Filename containing synthesized audio.  Null if
                                    plugin has not yet synthesized the utterance, or if
                                    plugin does not support synthesis. */
    Player* audioPlayer;         /* The audio player audibilizing the utterance.  Null
                                    if not currently audibilizing or if plugin doesn't
                                    support synthesis. */
};

/**
* Iterator for queue of utterances.
*/
typedef TQValueVector<Utt>::iterator uttIterator;

// Timer interval for checking whether audio playback is finished.
const int timerInterval = 500;

/**
 * This class is in charge of getting the messages, warnings and text from
 * the queue and call the plug ins function to actually speak the texts.
 */
class Speaker : public TQObject{
    Q_OBJECT


    public:
        /**
         * Constructor
         * Calls load plug ins
         */
        Speaker(SpeechData* speechData, TalkerMgr* talkerMgr,
                TQObject *parent = 0, const char *name = 0);

        /**
         * Destructor
         */
        ~Speaker();

        /**
         * Tells the thread to exit
         */
        void requestExit();

        /**
        * Main processing loop.  Dequeues utterances and sends them to the
        * plugins and/or Audio Player.
        */
        void doUtterances();

        /**
        * Determine if kttsd is currently speaking any text jobs.
        * @return               True if currently speaking any text jobs.
        */
        bool isSpeakingText();

        /**
        * Get the job number of the current text job.
        * @return               Job number of the current text job. 0 if no jobs.
        *
        * Note that the current job may not be speaking. See @ref isSpeakingText.
        * @see getTextJobState.
        * @see isSpeakingText
        */
        uint getCurrentTextJob();

        /**
        * Remove a text job from the queue.
        * @param jobNum         Job number of the text job.
        *
        * The job is deleted from the queue and the @ref textRemoved signal is emitted.
        *
        * If there is another job in the text queue, and it is marked speakable,
        * that job begins speaking.
        */
        void removeText(const uint jobNum);

        /**
        * Start a text job at the beginning.
        * @param jobNum         Job number of the text job.
        *
        * Rewinds the job to the beginning.
        *
        * The job is marked speakable.
        * If there are other speakable jobs preceeding this one in the queue,
        * those jobs continue speaking and when finished, this job will begin speaking.
        * If there are no other speakable jobs preceeding this one, it begins speaking.
        *
        * The @ref textStarted signal is emitted when the text job begins speaking.
        * When all the sentences of the job have been spoken, the job is marked for deletion from
        * the text queue and the @ref textFinished signal is emitted.
        */
        void startText(const uint jobNum);

        /**
        * Stop a text job and rewind to the beginning.
        * @param jobNum         Job number of the text job.
        *
        * The job is marked not speakable and will not be speakable until @ref startText or @ref resumeText
        * is called.
        *
        * If there are speaking jobs preceeding this one in the queue, they continue speaking.
        * If the job is currently speaking, the @ref textStopped signal is emitted and the job stops speaking.
        * Depending upon the speech engine and plugin used, speeking may not stop immediately
        * (it might finish the current sentence).
        */
        void stopText(const uint jobNum);

        /**
        * Pause a text job.
        * @param jobNum         Job number of the text job.
        *
        * The job is marked as paused and will not be speakable until @ref resumeText or
        * @ref startText is called.
        *
        * If there are speaking jobs preceeding this one in the queue, they continue speaking.
        * If the job is currently speaking, the @ref textPaused signal is emitted and the job stops speaking.
        * Depending upon the speech engine and plugin used, speeking may not stop immediately
        * (it might finish the current sentence).
        * @see resumeText
        */
        void pauseText(const uint jobNum);

        /**
        * Start or resume a text job where it was paused.
        * @param jobNum         Job number of the text job.
        *
        * The job is marked speakable.
        *
        * If the job is currently speaking, or is waiting to be spoken (speakable
        * state), the resumeText() call is ignored.
        *
        * If the job is currently queued, or is finished, it is the same as calling
        * @ref startText .
        *
        * If there are speaking jobs preceeding this one in the queue, those jobs continue speaking and,
        * when finished this job will begin speaking where it left off.
        *
        * The @ref textResumed signal is emitted when the job resumes.
        * @see pauseText
        */
        void resumeText(const uint jobNum);

        /**
        * Move a text job down in the queue so that it is spoken later.
        * @param jobNum         Job number of the text job.
        *
        * If the job is currently speaking, it is paused.
        * If the next job in the queue is speakable, it begins speaking.
        */
        void moveTextLater(const uint jobNum);

        /**
        * Jump to the first sentence of a specified part of a text job.
        * @param partNum        Part number of the part to jump to.  Parts are numbered starting at 1.
        * @param jobNum         Job number of the text job.
        * @return               Part number of the part actually jumped to.
        *
        * If partNum is greater than the number of parts in the job, jumps to last part.
        * If partNum is 0, does nothing and returns the current part number.
        * If no such job, does nothing and returns 0.
        * Does not affect the current speaking/not-speaking state of the job.
        */
        int jumpToTextPart(const int partNum, const uint jobNum);

        /**
        * Advance or rewind N sentences in a text job.
        * @param n              Number of sentences to advance (positive) or rewind (negative)
        *                       in the job.
        * @param jobNum         Job number of the text job.
        * @return               Sequence number of the sentence actually moved to.
        *                       Sequence numbers are numbered starting at 1.
        *
        * If no such job, does nothing and returns 0.
        * If n is zero, returns the current sequence number of the job.
        * Does not affect the current speaking/not-speaking state of the job.
        */
        uint moveRelTextSentence(const int n, const uint jobNum);

    signals:
        /**
         * Emitted whenever reading a text was started or resumed
         */
        void readingStarted();

        /**
         * Emitted whenever reading a text was finished,
         * or paused, or stopped before it was finished
         */
        void readingStopped();

        /**
         * Emitted whenever a message or warning interrupts reading a text
         */
        void readingInterrupted();

        /**
         * Emitted whenever reading a text is resumed after it was interrupted
         * Note: In function resumeText, readingStarted is called instead
         */
        void readingResumed();

        /* The following signals correspond to the signals in the KSpeech interface. */

        /**
        * This signal is emitted when the speech engine/plugin encounters a marker in the text.
        * @param appId          DCOP application ID of the application that queued the text.
        * @param markerName     The name of the marker seen.
        * @see markers
        */
        void markerSeen(const TQCString& appId, const TQString& markerName);

        /**
        * This signal is emitted whenever a sentence begins speaking.
        * @param appId          DCOP application ID of the application that queued the text.
        * @param jobNum         Job number of the text job.
        * @param seq            Sequence number of the text.
        */
        void sentenceStarted(TQString text, TQString language, const TQCString& appId,
            const uint jobNum, const uint seq);

        /**
        * This signal is emitted when a sentence has finished speaking.
        * @param appId          DCOP application ID of the application that queued the text.
        * @param jobNum         Job number of the text job.
        * @param seq            Sequence number of the text.
        */
        void sentenceFinished(const TQCString& appId, const uint jobNum, const uint seq);

        /**
        * This signal is emitted whenever speaking of a text job begins.
        * @param appId          The DCOP senderId of the application that created the job.  NULL if kttsd.
        * @param jobNum         Job number of the text job.
        */
        void textStarted(const TQCString& appId, const uint jobNum);

        /**
        * This signal is emitted whenever a text job is finished.  The job has
        * been marked for deletion from the queue and will be deleted when another
        * job reaches the Finished state. (Only one job in the text queue may be
        * in state Finished at one time.)  If @ref startText or @ref resumeText is
        * called before the job is deleted, it will remain in the queue for speaking.
        * @param appId          The DCOP senderId of the application that created the job.
        * @param jobNum         Job number of the text job.
        */
        void textFinished(const TQCString& appId, const uint jobNum);

        /**
        * This signal is emitted whenever a speaking text job stops speaking.
        * @param appId          The DCOP senderId of the application that created the job.
        * @param jobNum         Job number of the text job.
        */
        void textStopped(const TQCString& appId, const uint jobNum);
        /**
        * This signal is emitted whenever a speaking text job is paused.
        * @param appId          The DCOP senderId of the application that created the job.
        * @param jobNum         Job number of the text job.
        */
        void textPaused(const TQCString& appId, const uint jobNum);
        /**
        * This signal is emitted when a text job, that was previously paused, resumes speaking.
        * @param appId          The DCOP senderId of the application that created the job.
        * @param jobNum         Job number of the text job.
        */
        void textResumed(const TQCString& appId, const uint jobNum);

    protected:
        /**
        * Processes events posted by ThreadedPlugIns.
        */
        virtual bool event ( TQEvent * e );

    private slots:
        /**
        * Received from PlugIn objects when they finish asynchronous synthesis.
        */
        void slotSynthFinished();
        /**
        * Received from PlugIn objects when they finish asynchronous synthesis
        * and audibilizing.
        */
        void slotSayFinished();
        /**
        * Received from PlugIn objects when they asynchronously stopText.
        */
        void slotStopped();
        /**
        * Received from audio stretcher when stretching (speed adjustment) is finished.
        */
        void slotStretchFinished();
        /**
        * Received from transformer (SSMLConvert) when transforming is finished.
        */
        void slotTransformFinished();
        /** Received from PlugIn object when they encounter an error.
         * @param keepGoing               True if the plugin can continue processing.
        *                                False if the plugin cannot continue, for example,
        *                                the speech engine could not be started.
        * @param msg                     Error message.
        */
        void slotError(bool keepGoing, const TQString &msg);
        /**
        * Received from Timer when it fires.
        * Check audio player to see if it is finished.
        */
        void slotTimeout();

    private:

        /**
        * Converts an utterance state enumerator to a displayable string.
        * @param state           Utterance state.
        * @return                Displayable string for utterance state.
        */
        TQString uttStateToStr(uttState state);

        /**
        * Converts an utterance type enumerator to a displayable string.
        * @param utType          Utterance type.
        * @return                Displayable string for utterance type.
        */
        TQString uttTypeToStr(uttType utType);

        /**
        * Converts a plugin state enumerator to a displayable string.
        * @param state           Plugin state.
        * @return                Displayable string for plugin state.
        */
        TQString pluginStateToStr(pluginState state);

        /**
        * Converts a job state enumerator to a displayable string.
        * @param state           Job state.
        * @return                Displayable string for job state.
        */
        TQString jobStateToStr(int state);

        /**
        * Determines whether the given text is SSML markup.
        */
        bool isSsml(const TQString &text);

        /**
        * Determines the initial state of an utterance.  If the utterance contains
        * SSML, the state is set to usWaitingTransform.  Otherwise, if the plugin
        * supports async synthesis, sets to usWaitingSynth, otherwise usWaitingSay.
        * If an utterance has already been transformed, usWaitingTransform is
        * skipped to either usWaitingSynth or usWaitingSay.
        * @param utt             The utterance.
        */
        void setInitialUtteranceState(Utt &utt);

        /**
        * Returns true if the given job and sequence number is already in the utterance queue.
        */
        bool isInUtteranceQueue(uint jobNum, uint seqNum);

        /**
        * Gets the next utterance to be spoken from speechdata and adds it to the queue.
        * @return                True if one or more utterances were added to the queue.
        *
        * Checks for waiting ScreenReaderOutput, Warnings, Messages, or Text,
        * in that order.
        * If Warning or Message and interruption messages have been configured,
        * adds those to the queue as well.
        * Determines which plugin should be used for the utterance.
        */
        bool getNextUtterance();

        /**
        * Given an iterator pointing to the m_uttQueue, deletes the utterance
        * from the queue.  If the utterance is currently being processed by a
        * plugin or the Audio Player, halts that operation and deletes Audio Player.
        * Also takes care of deleting temporary audio file.
        * @param it                      Iterator pointer to m_uttQueue.
        * @return                        Iterator pointing to the next utterance in the
        *                                queue, or m_uttQueue.end().
        */
        uttIterator deleteUtterance(uttIterator it);

        /**
        * Given an iterator pointing to the m_uttQueue, starts playing audio if
        *   1) An audio file is ready to be played, and
        *   2) It is not already playing.
        * If another audio player is already playing, pauses it before starting
        * the new audio player.
        * @param it                      Iterator pointer to m_uttQueue.
        * @return                        True if an utterance began playing or resumed.
        */
        bool startPlayingUtterance(uttIterator it);

        /**
        * Delete any utterances in the queue with this jobNum.
        * @param jobNum          The Job Number of the utterance(s) to delete.
        * If currently processing any deleted utterances, stop them.
        */
        void deleteUtteranceByJobNum(const uint jobNum);

        /**
        * Pause the utterance with this jobNum and if it is playing on the Audio Player,
        * pause the Audio Player.
        * @param jobNum          The Job Number of the utterance to pause.
        */
        void pauseUtteranceByJobNum(const uint jobNum);

        /**
        * Takes care of emitting reading interrupted/resumed and sentence started signals.
        * Should be called just before audibilizing an utterance.
        * @param it                      Iterator pointer to m_uttQueue.
        */
        void prePlaySignals(uttIterator it);

        /**
        * Takes care of emitting sentenceFinished signal.
        * Should be called immediately after an utterance has completed playback.
        * @param it                      Iterator pointer to m_uttQueue.
        */
        void postPlaySignals(uttIterator it);

        /**
        * Constructs a temporary filename for plugins to use as a suggested filename
        * for synthesis to write to.
        * @return                        Full pathname of suggested file.
        */
        TQString makeSuggestedFilename();

        /**
        * Creates and returns a player object based on user option.
        */
        Player* createPlayerObject();

        /**
         * SpeechData local pointer
         */
        SpeechData* m_speechData;

        /**
        * TalkerMgr local pointer.
        */
        TalkerMgr* m_talkerMgr;

        /**
        * True if the speaker was requested to exit.
        */
        volatile bool m_exitRequested;

        /**
        * Queue of utterances we are currently processing.
        */
        TQValueVector<Utt> m_uttQueue;

        /**
        * True when text job reading has been interrupted.
        */
        bool m_textInterrupted;

        /**
        * Used to prevent doUtterances from prematurely exiting.
        */
        bool m_again;

        /**
        * Which audio player to use.
        *  0 = aRts
        *  1 = gstreamer
        *  2 = ALSA
        */
        int m_playerOption;

        /**
        * Audio stretch factor (Speed).
        */
        float m_audioStretchFactor;

        /**
        * GStreamer sink name to use, or ALSA PCM device name.
        */
        TQString m_sinkName;

        /**
        * Timer for monitoring audio player.
        */
        TQTimer* m_timer;

        /**
        * Current Text job being processed.
        */
        uint m_currentJobNum;

        /**
        * Job Number, appId, and sequence number of the last text sentence queued.
        */
        uint m_lastJobNum;
        TQCString m_lastAppId;
        uint m_lastSeq;

        /**
        * Some parameters used by ALSA plugin.
        * Size of buffer interrupt period (in frames)
        * Number of periods in buffer.
        */
        uint m_periodSize;
        uint m_periods;

        /**
        * Debug level in players.
        */
        uint m_playerDebugLevel;
};

#endif // _SPEAKER_H_