You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
tdesdk/poxml/po2xml.cpp

262 lines
7.7 KiB

// #define POXML_DEBUG
#include "parser.h"
#include <stdlib.h>
#include <iostream>
#include <assert.h>
#include <tqregexp.h>
#include <fstream>
#include "GettextLexer.h"
#include "GettextParser.h"
#include "antlr/AST.h"
#include "antlr/CommonAST.h"
using namespace std;
TQString translate(TQString xml, TQString orig, TQString translation)
{
TQString prefix;
while (xml.at(0) == '<' && orig.at(0) != '<') {
// a XML tag as prefix
int index = xml.find('>');
assert(index != -1);
index++;
while (xml.at(index) == ' ')
index++;
prefix = prefix + xml.left(index);
xml = xml.mid(index, xml.length());
}
int index = xml.find(orig);
if (index == -1) {
tqWarning("can't find\n%s\nin\n%s", orig.latin1(), xml.latin1());
exit(1);
}
if (!translation.isEmpty())
xml.replace(index, orig.length(), translation);
return prefix + xml;
}
int main( int argc, char **argv )
{
if (argc != 3) {
tqWarning("usage: %s english-XML translated-PO", argv[0]);
::exit(1);
}
MsgList english = parseXML(argv[1]);
MsgList translated;
try {
ifstream s(argv[2]);
GettextLexer lexer(s);
GettextParser parser(lexer);
translated = parser.file();
} catch(exception& e) {
cerr << "exception: " << e.what() << endl;
return 1;
}
TQMap<TQString, TQString> translations;
for (MsgList::ConstIterator it = translated.begin();
it != translated.end(); ++it)
{
TQString msgstr;
TQString msgid = escapePO((*it).msgid);
if ((*it).comment.find("fuzzy") < 0)
msgstr = escapePO((*it).msgstr);
#ifdef POXML_DEBUG
tqDebug("inserting translations '%s' -> '%s'", msgid.latin1(),msgstr.latin1());
#endif
translations.insert(msgid, msgstr);
}
TQFile xml(argv[1]);
xml.open(IO_ReadOnly);
TQTextStream ds(&xml);
ds.setEncoding(TQTextStream::UnicodeUTF8);
TQString xml_text = ds.read();
xml.close();
TQString output;
TQTextStream ts(&output, IO_WriteOnly);
StructureParser::cleanupTags(xml_text);
TQValueList<int> line_offsets;
line_offsets.append(0);
int index = 0;
while (true) {
index = xml_text.find('\n', index) + 1;
if (index <= 0)
break;
line_offsets.append(index);
}
int old_start_line = -1, old_start_col = -1;
TQString old_text;
MsgList::Iterator old_it = english.end();
for (MsgList::Iterator it = english.begin();
it != english.end(); ++it)
{
BlockInfo bi = (*it).lines.first();
int start_pos = line_offsets[bi.start_line - 1] + bi.start_col;
if (!bi.end_line)
continue;
int end_pos = line_offsets[bi.end_line - 1] + bi.end_col - 1;
(*it).start = start_pos;
if (old_start_line == bi.start_line &&
old_start_col == bi.start_col)
{
(*old_it).end = bi.offset;
(*it).end = end_pos;
} else {
(*it).lines.first().offset = 0;
(*it).end = 0;
}
old_start_line = bi.start_line;
old_start_col = bi.start_col;
old_it = it;
}
int old_pos = 0;
for (MsgList::Iterator it = english.begin();
it != english.end(); ++it)
{
BlockInfo bi = (*it).lines.first();
int start_pos = line_offsets[bi.start_line - 1] + bi.start_col;
if (!bi.end_line)
continue;
int end_pos = line_offsets[bi.end_line - 1] + bi.end_col - 1;
TQString xml = xml_text.mid(start_pos, end_pos - start_pos);
int index = 0;
while (true) {
index = xml.find("<!--");
if (index == -1)
break;
int end_index = index + 4;
while (xml.at(end_index) != '>' ||
xml.at(end_index-1) != '-' ||
xml.at(end_index-2) != '-')
{
end_index++;
}
xml.replace(index, end_index + 1 - index, " ");
index = end_index;
}
StructureParser::descape(xml);
TQString descaped = StructureParser::descapeLiterals((*it).msgid);
if (translations.contains(descaped))
descaped = translations[descaped];
#ifdef POXML_DEBUG
// assert(!descaped.isEmpty());
#endif
if ((*it).msgid.at(0) == '<' && StructureParser::isClosure((*it).msgid)) {
// if the id starts with a tag, then we remembered the
// correct line information and need to strip the target
// now, so it fits
int index = 0;
while ((*it).msgid.at(index) != '>')
index++;
index++;
while ((*it).msgid.at(index) == ' ')
index++;
TQString omsgid = (*it).msgid;
(*it).msgid = (*it).msgid.mid(index);
index = (*it).msgid.length() - 1;
while ((*it).msgid.at(index) != '<')
index--;
(*it).msgid = (*it).msgid.left(index);
if (!descaped.isEmpty()) {
if (descaped.at(0) != '<') {
tqWarning("the translation of '%s' doesn't start with a tag.", omsgid.latin1());
exit(1);
}
index = 0;
while (index <= (int)descaped.length() && descaped.at(index) != '>')
index++;
index++;
while (descaped.at(index) == ' ')
index++;
descaped = descaped.mid(index);
index = descaped.length() - 1;
while (index >= 0 && descaped.at(index) != '<')
index--;
descaped = descaped.left(index);
}
}
#ifdef POXML_DEBUG
tqDebug("english \"%s\" ORIG \"%s\" %d(%d-%d) %d(%d-%d) %d %d TRANS \"%s\" %d '%s'", xml.latin1(), (*it).msgid.latin1(),
start_pos, bi.start_line, bi.start_col,
end_pos, bi.end_line, bi.end_col,
(*it).lines.first().offset,
(*it).end,
translations[(*it).msgid].latin1(), (*it).end,
descaped.latin1()
);
#endif
if ((*it).end) {
if (!(*it).lines.first().offset && end_pos != old_pos) {
assert(start_pos >= old_pos);
ts << xml_text.mid(old_pos, start_pos - old_pos);
}
assert((*it).end >= bi.offset);
ts << translate(xml.mid(bi.offset, (*it).end - bi.offset),
(*it).msgid, descaped);
old_pos = end_pos;
} else {
if (start_pos != old_pos) {
if (start_pos < old_pos) {
tqDebug("so far: '%s'", output.latin1());
}
assert(start_pos > old_pos);
ts << xml_text.mid(old_pos, start_pos - old_pos);
}
old_pos = end_pos;
ts << translate(xml,
(*it).msgid, descaped);
}
}
ts << xml_text.mid(old_pos);
output.replace(TQRegExp("<trans_comment\\s*>"), "");
output.replace(TQRegExp("</trans_comment\\s*>"), "");
StructureParser::removeEmptyTags(output);
index = 0;
while (true) {
index = output.find(TQRegExp(">[^\n]"), index );
if ( index == -1 )
break;
if ( output.at( index - 1 ) == '/' || output.at( index - 1 ) == '-' ||
output.at( index - 1 ) == ']' || output.at( index - 1 ) == '?' )
index = index + 1;
else {
output.replace( index, 1, "\n>" );
index = index + 2;
}
}
output = StructureParser::descapeLiterals(output);
cout << output.utf8().data();
return 0;
}