You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
pytqt/pylupdate3/metatranslator.cpp

558 lines
14 KiB

/**********************************************************************
** Copyright (C) 2000 Trolltech AS. All rights reserved.
**
** metatranslator.cpp
**
** This file is part of TQt Linguist.
**
** See the file LICENSE included in the distribution for the usage
** and distribution terms.
**
** The file is provided AS IS with NO WARRANTY OF ANY KIND,
** INCLUDING THE WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR
** A PARTICULAR PURPOSE.
**
**********************************************************************/
#include <qapplication.h>
#include <qcstring.h>
#include <qfile.h>
#include <qmessagebox.h>
#include <qregexp.h>
#include <qtextcodec.h>
#include <qtextstream.h>
#include <qxml.h>
#include "metatranslator.h"
static bool encodingIsUtf8( const TQXmlAttributes& atts )
{
for ( int i = 0; i < atts.length(); i++ ) {
// utf8="true" is a pre-3.0 syntax
if ( atts.qName(i) == TQString("utf8") ) {
return ( atts.value(i) == TQString("true") );
} else if ( atts.qName(i) == TQString("encoding") ) {
return ( atts.value(i) == TQString("UTF-8") );
}
}
return FALSE;
}
class TsHandler : public TQXmlDefaultHandler
{
public:
TsHandler( MetaTranslator *translator )
: tor( translator ), type( MetaTranslatorMessage::Finished ),
inMessage( FALSE ), ferrorCount( 0 ), contextIsUtf8( FALSE ),
messageIsUtf8( FALSE ) { }
virtual bool startElement( const TQString& namespaceURI,
const TQString& localName, const TQString& qName,
const TQXmlAttributes& atts );
virtual bool endElement( const TQString& namespaceURI,
const TQString& localName, const TQString& qName );
virtual bool characters( const TQString& ch );
virtual bool fatalError( const TQXmlParseException& exception );
private:
MetaTranslator *tor;
MetaTranslatorMessage::Type type;
bool inMessage;
TQString context;
TQString source;
TQString comment;
TQString translation;
TQString accum;
int ferrorCount;
bool contextIsUtf8;
bool messageIsUtf8;
};
bool TsHandler::startElement( const TQString& /* namespaceURI */,
const TQString& /* localName */,
const TQString& qName,
const TQXmlAttributes& atts )
{
if ( qName == TQString("byte") ) {
for ( int i = 0; i < atts.length(); i++ ) {
if ( atts.qName(i) == TQString("value") ) {
TQString value = atts.value( i );
int base = 10;
if ( value.startsWith("x") ) {
base = 16;
value = value.mid( 1 );
}
int n = value.toUInt( 0, base );
if ( n != 0 )
accum += TQChar( n );
}
}
} else {
if ( qName == TQString("context") ) {
context.truncate( 0 );
source.truncate( 0 );
comment.truncate( 0 );
translation.truncate( 0 );
contextIsUtf8 = encodingIsUtf8( atts );
} else if ( qName == TQString("message") ) {
inMessage = TRUE;
type = MetaTranslatorMessage::Finished;
source.truncate( 0 );
comment.truncate( 0 );
translation.truncate( 0 );
messageIsUtf8 = encodingIsUtf8( atts );
} else if ( qName == TQString("translation") ) {
for ( int i = 0; i < atts.length(); i++ ) {
if ( atts.qName(i) == TQString("type") ) {
if ( atts.value(i) == TQString("unfinished") )
type = MetaTranslatorMessage::Unfinished;
else if ( atts.value(i) == TQString("obsolete") )
type = MetaTranslatorMessage::Obsolete;
else
type = MetaTranslatorMessage::Finished;
}
}
}
accum.truncate( 0 );
}
return TRUE;
}
bool TsHandler::endElement( const TQString& /* namespaceURI */,
const TQString& /* localName */,
const TQString& qName )
{
if ( qName == TQString("codec") || qName == TQString("defaultcodec") ) {
// "codec" is a pre-3.0 syntax
tor->setCodec( accum );
} else if ( qName == TQString("name") ) {
context = accum;
} else if ( qName == TQString("source") ) {
source = accum;
} else if ( qName == TQString("comment") ) {
if ( inMessage ) {
comment = accum;
} else {
if ( contextIsUtf8 )
tor->insert( MetaTranslatorMessage(context.utf8(), "",
accum.utf8(), TQString::null, TRUE,
MetaTranslatorMessage::Unfinished) );
else
tor->insert( MetaTranslatorMessage(context.ascii(), "",
accum.ascii(), TQString::null, FALSE,
MetaTranslatorMessage::Unfinished) );
}
} else if ( qName == TQString("translation") ) {
translation = accum;
} else if ( qName == TQString("message") ) {
if ( messageIsUtf8 )
tor->insert( MetaTranslatorMessage(context.utf8(), source.utf8(),
comment.utf8(), translation,
TRUE, type) );
else
tor->insert( MetaTranslatorMessage(context.ascii(), source.ascii(),
comment.ascii(), translation,
FALSE, type) );
inMessage = FALSE;
}
return TRUE;
}
bool TsHandler::characters( const TQString& ch )
{
TQString t = ch;
t.replace( TQRegExp(TQChar('\r')), "" );
accum += t;
return TRUE;
}
bool TsHandler::fatalError( const TQXmlParseException& exception )
{
if ( ferrorCount++ == 0 ) {
TQString msg;
msg.sprintf( "Parse error at line %d, column %d (%s).",
exception.lineNumber(), exception.columnNumber(),
exception.message().latin1() );
if ( qApp == 0 )
qWarning( "XML error: %s", msg.latin1() );
else
TQMessageBox::information( qApp->mainWidget(),
TQObject::tr("TQt Linguist"), msg );
}
return FALSE;
}
static TQString numericEntity( int ch )
{
return TQString( ch <= 0x20 ? "<byte value=\"x%1\"/>" : "&#x%1;" )
.arg( ch, 0, 16 );
}
static TQString protect( const TQCString& str )
{
TQString result;
int len = (int) str.length();
for ( int k = 0; k < len; k++ ) {
switch( str[k] ) {
case '\"':
result += TQString( "&quot;" );
break;
case '&':
result += TQString( "&amp;" );
break;
case '>':
result += TQString( "&gt;" );
break;
case '<':
result += TQString( "&lt;" );
break;
case '\'':
result += TQString( "&apos;" );
break;
default:
if ( (uchar) str[k] < 0x20 && str[k] != '\n' )
result += numericEntity( (uchar) str[k] );
else
result += str[k];
}
}
return result;
}
static TQString evilBytes( const TQCString& str, bool utf8 )
{
if ( utf8 ) {
return protect( str );
} else {
TQString result;
TQCString t = protect( str ).latin1();
int len = (int) t.length();
for ( int k = 0; k < len; k++ ) {
if ( (uchar) t[k] >= 0x7f )
result += numericEntity( (uchar) t[k] );
else
result += TQChar( t[k] );
}
return result;
}
}
MetaTranslatorMessage::MetaTranslatorMessage()
: utfeight( FALSE ), ty( Unfinished )
{
}
MetaTranslatorMessage::MetaTranslatorMessage( const char *context,
const char *sourceText,
const char *comment,
const TQString& translation,
bool utf8, Type type )
: TQTranslatorMessage( context, sourceText, comment, translation ),
utfeight( FALSE ), ty( type )
{
/*
Don't use UTF-8 if it makes no difference. UTF-8 should be
reserved for the real problematic case: non-ASCII (possibly
non-Latin-1) characters in .ui files.
*/
if ( utf8 ) {
if ( sourceText != 0 ) {
int i = 0;
while ( sourceText[i] != '\0' ) {
if ( (uchar) sourceText[i] >= 0x80 ) {
utfeight = TRUE;
break;
}
i++;
}
}
if ( !utfeight && comment != 0 ) {
int i = 0;
while ( comment[i] != '\0' ) {
if ( (uchar) comment[i] >= 0x80 ) {
utfeight = TRUE;
break;
}
i++;
}
}
}
}
MetaTranslatorMessage::MetaTranslatorMessage( const MetaTranslatorMessage& m )
: TQTranslatorMessage( m ), utfeight( m.utfeight ), ty( m.ty )
{
}
MetaTranslatorMessage& MetaTranslatorMessage::operator=(
const MetaTranslatorMessage& m )
{
TQTranslatorMessage::operator=( m );
utfeight = m.utfeight;
ty = m.ty;
return *this;
}
bool MetaTranslatorMessage::operator==( const MetaTranslatorMessage& m ) const
{
return qstrcmp( context(), m.context() ) == 0 &&
qstrcmp( sourceText(), m.sourceText() ) == 0 &&
qstrcmp( comment(), m.comment() ) == 0;
}
bool MetaTranslatorMessage::operator<( const MetaTranslatorMessage& m ) const
{
int delta = qstrcmp( context(), m.context() );
if ( delta == 0 )
delta = qstrcmp( sourceText(), m.sourceText() );
if ( delta == 0 )
delta = qstrcmp( comment(), m.comment() );
return delta < 0;
}
MetaTranslator::MetaTranslator()
: codecName( "ISO-8859-1" ), codec( 0 )
{
}
MetaTranslator::MetaTranslator( const MetaTranslator& tor )
: mm( tor.mm ), codecName( tor.codecName ), codec( tor.codec )
{
}
MetaTranslator& MetaTranslator::operator=( const MetaTranslator& tor )
{
mm = tor.mm;
codecName = tor.codecName;
codec = tor.codec;
return *this;
}
bool MetaTranslator::load( const TQString& filename )
{
mm.clear();
TQFile f( filename );
if ( !f.open(IO_ReadOnly) )
return FALSE;
TQTextStream t( &f );
TQXmlInputSource in( t );
TQXmlSimpleReader reader;
// don't click on these!
reader.setFeature( "http://xml.org/sax/features/namespaces", FALSE );
reader.setFeature( "http://xml.org/sax/features/namespace-prefixes", TRUE );
reader.setFeature( "http://trolltech.com/xml/features/report-whitespace"
"-only-CharData", FALSE );
TQXmlDefaultHandler *hand = new TsHandler( this );
reader.setContentHandler( hand );
reader.setErrorHandler( hand );
bool ok = reader.parse( in );
reader.setContentHandler( 0 );
reader.setErrorHandler( 0 );
delete hand;
f.close();
if ( !ok )
mm.clear();
return ok;
}
bool MetaTranslator::save( const TQString& filename ) const
{
TQFile f( filename );
if ( !f.open(IO_WriteOnly) )
return FALSE;
TQTextStream t( &f );
t.setCodec( TQTextCodec::codecForName("ISO-8859-1") );
t << "<!DOCTYPE TS><TS>\n";
if ( codecName != "ISO-8859-1" )
t << "<defaultcodec>" << codecName << "</defaultcodec>\n";
TMM::ConstIterator m = mm.begin();
while ( m != mm.end() ) {
TMMInv inv;
TMMInv::Iterator i;
bool contextIsUtf8 = m.key().utf8();
TQCString context = m.key().context();
TQCString comment = "";
do {
if ( TQCString(m.key().sourceText()).isEmpty() ) {
if ( m.key().type() != MetaTranslatorMessage::Obsolete ) {
contextIsUtf8 = m.key().utf8();
comment = TQCString( m.key().comment() );
}
} else {
inv.insert( *m, m.key() );
}
} while ( ++m != mm.end() && TQCString(m.key().context()) == context );
t << "<context";
if ( contextIsUtf8 )
t << " encoding=\"UTF-8\"";
t << ">\n";
t << " <name>" << evilBytes( context, contextIsUtf8 )
<< "</name>\n";
if ( !comment.isEmpty() )
t << " <comment>" << evilBytes( comment, contextIsUtf8 )
<< "</comment>\n";
for ( i = inv.begin(); i != inv.end(); ++i ) {
t << " <message";
if ( (*i).utf8() )
t << " encoding=\"UTF-8\"";
t << ">\n"
<< " <source>" << evilBytes( (*i).sourceText(),
(*i).utf8() )
<< "</source>\n";
if ( !TQCString((*i).comment()).isEmpty() )
t << " <comment>" << evilBytes( (*i).comment(),
(*i).utf8() )
<< "</comment>\n";
t << " <translation";
if ( (*i).type() == MetaTranslatorMessage::Unfinished )
t << " type=\"unfinished\"";
else if ( (*i).type() == MetaTranslatorMessage::Obsolete )
t << " type=\"obsolete\"";
t << ">" << protect( (*i).translation().utf8() )
<< "</translation>\n";
t << " </message>\n";
}
t << "</context>\n";
}
t << "</TS>\n";
f.close();
return TRUE;
}
bool MetaTranslator::release( const TQString& filename, bool verbose ) const
{
TQTranslator tor( 0 );
int finished = 0;
int unfinished = 0;
int untranslated = 0;
TMM::ConstIterator m;
for ( m = mm.begin(); m != mm.end(); ++m ) {
if ( m.key().type() != MetaTranslatorMessage::Obsolete ) {
if ( m.key().translation().isEmpty() ) {
untranslated++;
} else {
if ( m.key().type() == MetaTranslatorMessage::Unfinished )
unfinished++;
else
finished++;
tor.insert( m.key() );
}
}
}
bool saved = tor.save( filename, TQTranslator::Stripped );
if ( saved && verbose )
qWarning( " %d finished, %d unfinished and %d untranslated messages",
finished, unfinished, untranslated );
return saved;
}
bool MetaTranslator::contains( const char *context, const char *sourceText,
const char *comment ) const
{
return mm.find( MetaTranslatorMessage(context, sourceText, comment) ) !=
mm.end();
}
void MetaTranslator::insert( const MetaTranslatorMessage& m )
{
int pos = mm.count();
TMM::Iterator n = mm.find( m );
if ( n != mm.end() )
pos = *n;
mm.replace( m, pos );
}
void MetaTranslator::stripObsoleteMessages()
{
TMM newmm;
TMM::Iterator m = mm.begin();
while ( m != mm.end() ) {
if ( m.key().type() != MetaTranslatorMessage::Obsolete )
newmm.insert( m.key(), *m );
++m;
}
mm = newmm;
}
void MetaTranslator::stripEmptyContexts()
{
TMM newmm;
TMM::Iterator m = mm.begin();
while ( m != mm.end() ) {
if ( TQCString(m.key().sourceText()).isEmpty() ) {
TMM::Iterator n = m;
++n;
// the context comment is followed by other messages
if ( n != newmm.end() &&
qstrcmp(m.key().context(), n.key().context()) == 0 )
newmm.insert( m.key(), *m );
} else {
newmm.insert( m.key(), *m );
}
++m;
}
mm = newmm;
}
void MetaTranslator::setCodec( const char *name )
{
const int latin1 = 4;
codecName = name;
codec = TQTextCodec::codecForName( name );
if ( codec == 0 || codec->mibEnum() == latin1 )
codec = 0;
}
TQString MetaTranslator::toUnicode( const char *str, bool utf8 ) const
{
if ( utf8 )
return TQString::fromUtf8( str );
else if ( codec == 0 )
return TQString( str );
else
return codec->toUnicode( str );
}
TQValueList<MetaTranslatorMessage> MetaTranslator::messages() const
{
int n = mm.count();
TMM::ConstIterator *t = new TMM::ConstIterator[n + 1];
TMM::ConstIterator m;
for ( m = mm.begin(); m != mm.end(); ++m )
t[*m] = m;
TQValueList<MetaTranslatorMessage> val;
for ( int i = 0; i < n; i++ )
val.append( t[i].key() );
delete[] t;
return val;
}
TQValueList<MetaTranslatorMessage> MetaTranslator::translatedMessages() const
{
TQValueList<MetaTranslatorMessage> val;
TMM::ConstIterator m;
for ( m = mm.begin(); m != mm.end(); ++m ) {
if ( m.key().type() == MetaTranslatorMessage::Finished )
val.append( m.key() );
}
return val;
}