You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
296 lines
12 KiB
296 lines
12 KiB
/***************************************************** vim:set ts=4 sw=4 sts=4:
|
|
SSMLConvert class
|
|
|
|
This class is in charge of converting SSML text into a format that can
|
|
be handled by individual synths.
|
|
-------------------
|
|
Copyright:
|
|
(C) 2004 by Paul Giannaros <ceruleanblaze@gmail.com>
|
|
(C) 2004 by Gary Cramblitt <garycramblitt@comcast.net>
|
|
-------------------
|
|
Original author: Paul Giannaros <ceruleanblaze@gmail.com>
|
|
******************************************************************************/
|
|
|
|
/***************************************************************************
|
|
* *
|
|
* This program is free software; you can redistribute it and/or modify *
|
|
* it under the terms of the GNU General Public License as published by *
|
|
* the Free Software Foundation; version 2 of the License. *
|
|
* *
|
|
***************************************************************************/
|
|
|
|
// TQt includes.
|
|
#include <tqstring.h>
|
|
#include <tqstringlist.h>
|
|
#include <tqdom.h>
|
|
#include <tqfile.h>
|
|
#include <tqtextstream.h>
|
|
|
|
// KDE includes.
|
|
#include <tdeversion.h>
|
|
#include <kstandarddirs.h>
|
|
#include <kprocess.h>
|
|
#include <tdetempfile.h>
|
|
#include <kdebug.h>
|
|
|
|
// SSMLConvert includes.
|
|
#include "ssmlconvert.h"
|
|
#include "ssmlconvert.moc"
|
|
|
|
/// Constructor.
|
|
SSMLConvert::SSMLConvert() {
|
|
m_talkers = TQStringList();
|
|
m_xsltProc = 0;
|
|
m_state = tsIdle;
|
|
}
|
|
|
|
/// Constructor. Set the talkers to be used as reference for entered text.
|
|
SSMLConvert::SSMLConvert(const TQStringList &talkers) {
|
|
m_talkers = talkers;
|
|
m_xsltProc = 0;
|
|
m_state = tsIdle;
|
|
}
|
|
|
|
/// Destructor.
|
|
SSMLConvert::~SSMLConvert() {
|
|
delete m_xsltProc;
|
|
if (!m_inFilename.isEmpty()) TQFile::remove(m_inFilename);
|
|
if (!m_outFilename.isEmpty()) TQFile::remove(m_outFilename);
|
|
}
|
|
|
|
/// Set the talkers to be used as reference for entered text.
|
|
void SSMLConvert::setTalkers(const TQStringList &talkers) {
|
|
m_talkers = talkers;
|
|
}
|
|
|
|
TQString SSMLConvert::extractTalker(const TQString &talkercode) {
|
|
TQString t = talkercode.section("synthesizer=", 1, 1);
|
|
t = t.section('"', 1, 1);
|
|
if(t.contains("flite"))
|
|
return "flite";
|
|
else
|
|
return t.left(t.find(" ")).lower();
|
|
}
|
|
|
|
/**
|
|
* Return the most appropriate talker for the text to synth talker code.
|
|
* @param text the text that will be parsed.
|
|
* @returns the appropriate talker for the job as a talker code.
|
|
*
|
|
* The appropriate talker is the one that has the most features that are required in some
|
|
* SSML markup. In the future i'm hoping to make the importance of individual features
|
|
* configurable, but better to walk before you can run.
|
|
* Currently, the searching method in place is like a filter: Those that meet the criteria we're
|
|
* searchin for stay while others are sifted out. This should leave us with the right talker to use.
|
|
* It's not a very good method, but should be appropriate in most cases and should do just fine for now.
|
|
*
|
|
* As it stands, here is the list of things that are looked for, in order of most importance:
|
|
* - Language
|
|
* Obviously the most important. If a language is specified, look for the talkers that support it.
|
|
* Default to en (or some form of en - en_US, en_GB, etc). Only one language at a time is allowed
|
|
* at the moment, and must be specified in the root speak element (<speak xml:lang="en-US">)
|
|
* - Gender
|
|
* If a gender is specified, look for talkers that comply. There is no default so if no gender is
|
|
* specified, no talkers will be removed. The only gender that will be searched for is the one
|
|
* specified in the root speak element. This should change in the future.
|
|
* - Prosody
|
|
* Check if prosody modification is allowed by the talker. Currently this is hardcoded (it
|
|
* is stated which talkers do and do not in a variable somewhere).
|
|
*
|
|
* Bear in mind that the XSL stylesheet that will be applied to the SSML is the same regardless
|
|
* of the how the talker is chosen, meaning that you don't lose some features of the talker if this
|
|
* search doesn't encompass them.
|
|
*
|
|
* TQDom is the item of choice for the matching. Just walk the tree..
|
|
*/
|
|
TQString SSMLConvert::appropriateTalker(const TQString &text) const {
|
|
TQDomDocument ssml;
|
|
ssml.setContent(text, false); // No namespace processing.
|
|
/// Matches are stored here. Obviously to begin with every talker matches.
|
|
TQStringList matches = m_talkers;
|
|
|
|
/// Check that this is (well formed) SSML and all our searching will not be in vain.
|
|
TQDomElement root = ssml.documentElement();
|
|
if(root.tagName() != "speak") {
|
|
// Not SSML.
|
|
return TQString();
|
|
}
|
|
|
|
/**
|
|
* For each rule that we are looking through, iterate over all currently
|
|
* matching talkers and remove all the talkers that don't match.
|
|
*
|
|
* Storage for talker code components.
|
|
*/
|
|
TQString talklang, talkvoice, talkgender, talkvolume, talkrate, talkname;
|
|
|
|
kdDebug() << "SSMLConvert::appropriateTalker: BEFORE LANGUAGE SEARCH: " << matches.join(" ") << endl;;
|
|
/**
|
|
* Language searching
|
|
*/
|
|
if(root.hasAttribute("xml:lang")) {
|
|
TQString lang = root.attribute("xml:lang");
|
|
kdDebug() << "SSMLConvert::appropriateTalker: xml:lang found (" << lang << ")" << endl;
|
|
/// If it is set to en*, then match all english speakers. They all sound the same anyways.
|
|
if(lang.contains("en-")) {
|
|
kdDebug() << "SSMLConvert::appropriateTalker: English" << endl;
|
|
lang = "en";
|
|
}
|
|
/// Find all hits and place them in matches. We don't search for the closing " because if
|
|
/// the talker emits lang="en-UK" or something we'll be ignoring it, which we don't what.
|
|
matches = matches.grep("lang=\"" + lang);
|
|
}
|
|
else {
|
|
kdDebug() << "SSMLConvert::appropriateTalker: no xml:lang found. Defaulting to en.." << endl;
|
|
matches = matches.grep("lang=\"en");
|
|
}
|
|
|
|
kdDebug() << "SSMLConvert::appropriateTalker: AFTER LANGUAGE SEARCH: " << matches.join(" ") << endl;;
|
|
|
|
/**
|
|
* Gender searching
|
|
* If, for example, male is specified and only female is found,
|
|
* ignore the choice and just use female.
|
|
*/
|
|
if(root.hasAttribute("gender")) {
|
|
TQString gender = root.attribute("gender");
|
|
kdDebug() << "SSMLConvert::appropriateTalker: gender found (" << gender << ")" << endl;
|
|
/// If the gender found is not 'male' or 'female' then ignore it.
|
|
if(!(gender == "male" || gender == "female")) {
|
|
/// Make sure that we don't strip away all the talkers because of no matches.
|
|
if(matches.grep("gender=\"" + gender).count() >= 1)
|
|
matches = matches.grep("gender=\"" + gender);
|
|
}
|
|
}
|
|
else {
|
|
kdDebug() << "SSMLConvert::appropriateTalker: no gender found." << endl;
|
|
}
|
|
|
|
/**
|
|
* Prosody
|
|
* Search for talkers that allow modification of the synth output - louder, higher,
|
|
* slower, etc. There should be a direct way to query each synth to find out if this
|
|
* is supported (some function in PlugInConf), but for now, hardcode all the way :(
|
|
*/
|
|
/// Known to support (feel free to add to the list and if search):
|
|
/// Festival Int (not flite), Hadifix
|
|
if(matches.grep("synthesizer=\"Festival Interactive").count() >= 1 ||
|
|
matches.grep("synthesizer=\"Hadifix").count() >= 1) {
|
|
|
|
kdDebug() << "SSMLConvert::appropriateTalker: Prosody allowed" << endl;
|
|
TQStringList tmpmatches = matches.grep("synthesizer=\"Festival Interactive");
|
|
matches = matches.grep("synthesizer=\"Hadifix");
|
|
matches = tmpmatches + matches;
|
|
}
|
|
else
|
|
kdDebug() << "SSMLConvert::appropriateTalker: No prosody-supporting talkers found" << endl;
|
|
|
|
/// Return the first match that complies. Maybe a discrete way to
|
|
/// choose between all the matches could be offered in the future. Some form of preference.
|
|
return matches[0];
|
|
}
|
|
|
|
/**
|
|
* Applies the spreadsheet for a talker to the SSML and returns the talker-native output.
|
|
* @param text The markup to apply the spreadsheet to.
|
|
* @param xsltFilename The name of the stylesheet file that will be applied (i.e freetts, flite).
|
|
* @returns False if an error occurs.
|
|
*
|
|
* This converts a piece of SSML into a format the given talker can understand. It applies
|
|
* an XSLT spreadsheet to the SSML and returns the output.
|
|
*
|
|
* Emits transformFinished signal when completed. Caller then calls getOutput to retrieve
|
|
* the transformed text.
|
|
*/
|
|
|
|
bool SSMLConvert::transform(const TQString &text, const TQString &xsltFilename) {
|
|
m_xsltFilename = xsltFilename;
|
|
/// Write @param text to a temporary file.
|
|
KTempFile inFile(locateLocal("tmp", "kttsd-"), ".ssml");
|
|
m_inFilename = inFile.file()->name();
|
|
TQTextStream* wstream = inFile.textStream();
|
|
if (wstream == 0) {
|
|
/// wtf...
|
|
kdDebug() << "SSMLConvert::transform: Can't write to " << m_inFilename << endl;;
|
|
return false;
|
|
}
|
|
// TODO: Is encoding an issue here?
|
|
// TODO: It would be nice if we detected whether the XML is properly formed
|
|
// with the required xml processing instruction and encoding attribute. If
|
|
// not wrap it in such. But maybe this should be handled by SpeechData::setText()?
|
|
*wstream << text;
|
|
inFile.close();
|
|
#if TDE_VERSION >= TDE_MAKE_VERSION (3,3,0)
|
|
inFile.sync();
|
|
#endif
|
|
|
|
// Get a temporary output file name.
|
|
KTempFile outFile(locateLocal("tmp", "kttsd-"), ".output");
|
|
m_outFilename = outFile.file()->name();
|
|
outFile.close();
|
|
// outFile.unlink(); // only activate this if necessary.
|
|
|
|
/// Spawn an xsltproc process to apply our stylesheet to our SSML file.
|
|
m_xsltProc = new TDEProcess;
|
|
*m_xsltProc << "xsltproc";
|
|
*m_xsltProc << "-o" << m_outFilename << "--novalid"
|
|
<< m_xsltFilename << m_inFilename;
|
|
// Warning: This won't compile under KDE 3.2. See FreeTTS::argsToStringList().
|
|
// kdDebug() << "SSMLConvert::transform: executing command: " <<
|
|
// m_xsltProc->args() << endl;
|
|
|
|
connect(m_xsltProc, TQT_SIGNAL(processExited(TDEProcess*)),
|
|
this, TQT_SLOT(slotProcessExited(TDEProcess*)));
|
|
if (!m_xsltProc->start(TDEProcess::NotifyOnExit, TDEProcess::NoCommunication))
|
|
{
|
|
kdDebug() << "SSMLConvert::transform: Error starting xsltproc" << endl;
|
|
return false;
|
|
}
|
|
m_state = tsTransforming;
|
|
return true;
|
|
}
|
|
|
|
void SSMLConvert::slotProcessExited(TDEProcess* /*proc*/)
|
|
{
|
|
m_xsltProc->deleteLater();
|
|
m_xsltProc = 0;
|
|
m_state = tsFinished;
|
|
emit transformFinished();
|
|
}
|
|
|
|
/**
|
|
* Returns current processing state.
|
|
*/
|
|
int SSMLConvert::getState() { return m_state; }
|
|
|
|
/**
|
|
* Returns the output from call to transform.
|
|
*/
|
|
TQString SSMLConvert::getOutput()
|
|
{
|
|
/// Read back the data that was written to /tmp/fileName.output.
|
|
TQFile readfile(m_outFilename);
|
|
if(!readfile.open(IO_ReadOnly)) {
|
|
/// uhh yeah... Issues writing to the SSML file.
|
|
kdDebug() << "SSMLConvert::slotProcessExited: Could not read file " << m_outFilename << endl;
|
|
return TQString();
|
|
}
|
|
TQTextStream rstream(&readfile);
|
|
TQString convertedData = rstream.read();
|
|
readfile.close();
|
|
|
|
// kdDebug() << "SSMLConvert::slotProcessExited: Read SSML file at " + m_inFilename + " and created " + m_outFilename + " based on the stylesheet at " << m_xsltFilename << endl;
|
|
|
|
// Clean up.
|
|
TQFile::remove(m_inFilename);
|
|
m_inFilename = TQString();
|
|
TQFile::remove(m_outFilename);
|
|
m_outFilename = TQString();
|
|
|
|
// Ready for another transform.
|
|
m_state = tsIdle;
|
|
|
|
return convertedData;
|
|
}
|
|
|