|
|
|
// #define POXML_DEBUG
|
|
|
|
|
|
|
|
#include "parser.h"
|
|
|
|
#include <stdlib.h>
|
|
|
|
#include <iostream>
|
|
|
|
#include <assert.h>
|
|
|
|
#include <tqregexp.h>
|
|
|
|
|
|
|
|
#include <fstream>
|
|
|
|
#include "GettextLexer.h"
|
|
|
|
#include "GettextParser.h"
|
|
|
|
#include "antlr/AST.h"
|
|
|
|
#include "antlr/CommonAST.h"
|
|
|
|
|
|
|
|
using namespace std;
|
|
|
|
|
|
|
|
TQString translate(TQString xml, TQString orig, TQString translation)
|
|
|
|
{
|
|
|
|
TQString prefix;
|
|
|
|
while (xml.at(0) == '<' && orig.at(0) != '<') {
|
|
|
|
// a XML tag as prefix
|
|
|
|
int index = xml.find('>');
|
|
|
|
assert(index != -1);
|
|
|
|
index++;
|
|
|
|
while (xml.at(index) == ' ')
|
|
|
|
index++;
|
|
|
|
prefix = prefix + xml.left(index);
|
|
|
|
xml = xml.mid(index, xml.length());
|
|
|
|
}
|
|
|
|
|
|
|
|
int index = xml.find(orig);
|
|
|
|
if (index == -1) {
|
|
|
|
tqWarning("can't find\n%s\nin\n%s", orig.latin1(), xml.latin1());
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
if (!translation.isEmpty())
|
|
|
|
xml.replace(index, orig.length(), translation);
|
|
|
|
return prefix + xml;
|
|
|
|
}
|
|
|
|
|
|
|
|
int main( int argc, char **argv )
|
|
|
|
{
|
|
|
|
if (argc != 3) {
|
|
|
|
tqWarning("usage: %s english-XML translated-PO", argv[0]);
|
|
|
|
::exit(1);
|
|
|
|
}
|
|
|
|
|
|
|
|
MsgList english = parseXML(argv[1]);
|
|
|
|
MsgList translated;
|
|
|
|
|
|
|
|
try {
|
|
|
|
ifstream s(argv[2]);
|
|
|
|
GettextLexer lexer(s);
|
|
|
|
GettextParser parser(lexer);
|
|
|
|
translated = parser.file();
|
|
|
|
|
|
|
|
} catch(exception& e) {
|
|
|
|
cerr << "exception: " << e.what() << endl;
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
TQMap<TQString, TQString> translations;
|
|
|
|
for (MsgList::ConstIterator it = translated.begin();
|
|
|
|
it != translated.end(); ++it)
|
|
|
|
{
|
|
|
|
TQString msgstr;
|
|
|
|
TQString msgid = escapePO((*it).msgid);
|
|
|
|
if ((*it).comment.find("fuzzy") < 0)
|
|
|
|
msgstr = escapePO((*it).msgstr);
|
|
|
|
|
|
|
|
#ifdef POXML_DEBUG
|
|
|
|
tqDebug("inserting translations '%s' -> '%s'", msgid.latin1(),msgstr.latin1());
|
|
|
|
#endif
|
|
|
|
translations.insert(msgid, msgstr);
|
|
|
|
}
|
|
|
|
|
|
|
|
TQFile xml(argv[1]);
|
|
|
|
xml.open(IO_ReadOnly);
|
|
|
|
TQTextStream ds(&xml);
|
|
|
|
ds.setEncoding(TQTextStream::UnicodeUTF8);
|
|
|
|
TQString xml_text = ds.read();
|
|
|
|
xml.close();
|
|
|
|
TQString output;
|
|
|
|
TQTextStream ts(&output, IO_WriteOnly);
|
|
|
|
StructureParser::cleanupTags(xml_text);
|
|
|
|
|
|
|
|
TQValueList<int> line_offsets;
|
|
|
|
line_offsets.append(0);
|
|
|
|
int index = 0;
|
|
|
|
while (true) {
|
|
|
|
index = xml_text.find('\n', index) + 1;
|
|
|
|
if (index <= 0)
|
|
|
|
break;
|
|
|
|
line_offsets.append(index);
|
|
|
|
}
|
|
|
|
|
|
|
|
int old_start_line = -1, old_start_col = -1;
|
|
|
|
TQString old_text;
|
|
|
|
MsgList::Iterator old_it = english.end();
|
|
|
|
|
|
|
|
for (MsgList::Iterator it = english.begin();
|
|
|
|
it != english.end(); ++it)
|
|
|
|
{
|
|
|
|
BlockInfo bi = (*it).lines.first();
|
|
|
|
int start_pos = line_offsets[bi.start_line - 1] + bi.start_col;
|
|
|
|
if (!bi.end_line)
|
|
|
|
continue;
|
|
|
|
int end_pos = line_offsets[bi.end_line - 1] + bi.end_col - 1;
|
|
|
|
|
|
|
|
(*it).start = start_pos;
|
|
|
|
if (old_start_line == bi.start_line &&
|
|
|
|
old_start_col == bi.start_col)
|
|
|
|
{
|
|
|
|
(*old_it).end = bi.offset;
|
|
|
|
(*it).end = end_pos;
|
|
|
|
} else {
|
|
|
|
(*it).lines.first().offset = 0;
|
|
|
|
(*it).end = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
old_start_line = bi.start_line;
|
|
|
|
old_start_col = bi.start_col;
|
|
|
|
old_it = it;
|
|
|
|
}
|
|
|
|
|
|
|
|
int old_pos = 0;
|
|
|
|
|
|
|
|
for (MsgList::Iterator it = english.begin();
|
|
|
|
it != english.end(); ++it)
|
|
|
|
{
|
|
|
|
BlockInfo bi = (*it).lines.first();
|
|
|
|
int start_pos = line_offsets[bi.start_line - 1] + bi.start_col;
|
|
|
|
if (!bi.end_line)
|
|
|
|
continue;
|
|
|
|
int end_pos = line_offsets[bi.end_line - 1] + bi.end_col - 1;
|
|
|
|
|
|
|
|
TQString xml = xml_text.mid(start_pos, end_pos - start_pos);
|
|
|
|
int index = 0;
|
|
|
|
while (true) {
|
|
|
|
index = xml.find("<!--");
|
|
|
|
if (index == -1)
|
|
|
|
break;
|
|
|
|
int end_index = index + 4;
|
|
|
|
while (xml.at(end_index) != '>' ||
|
|
|
|
xml.at(end_index-1) != '-' ||
|
|
|
|
xml.at(end_index-2) != '-')
|
|
|
|
{
|
|
|
|
end_index++;
|
|
|
|
}
|
|
|
|
xml.replace(index, end_index + 1 - index, " ");
|
|
|
|
index = end_index;
|
|
|
|
}
|
|
|
|
StructureParser::descape(xml);
|
|
|
|
|
|
|
|
TQString descaped = StructureParser::descapeLiterals((*it).msgid);
|
|
|
|
if (translations.contains(descaped))
|
|
|
|
descaped = translations[descaped];
|
|
|
|
|
|
|
|
#ifdef POXML_DEBUG
|
|
|
|
// assert(!descaped.isEmpty());
|
|
|
|
#endif
|
|
|
|
|
|
|
|
if ((*it).msgid.at(0) == '<' && StructureParser::isClosure((*it).msgid)) {
|
|
|
|
// if the id starts with a tag, then we remembered the
|
|
|
|
// correct line information and need to strip the target
|
|
|
|
// now, so it fits
|
|
|
|
int index = 0;
|
|
|
|
while ((*it).msgid.at(index) != '>')
|
|
|
|
index++;
|
|
|
|
index++;
|
|
|
|
while ((*it).msgid.at(index) == ' ')
|
|
|
|
index++;
|
|
|
|
TQString omsgid = (*it).msgid;
|
|
|
|
(*it).msgid = (*it).msgid.mid(index);
|
|
|
|
|
|
|
|
index = (*it).msgid.length() - 1;
|
|
|
|
while ((*it).msgid.at(index) != '<')
|
|
|
|
index--;
|
|
|
|
|
|
|
|
(*it).msgid = (*it).msgid.left(index);
|
|
|
|
|
|
|
|
if (!descaped.isEmpty()) {
|
|
|
|
if (descaped.at(0) != '<') {
|
|
|
|
tqWarning("the translation of '%s' doesn't start with a tag.", omsgid.latin1());
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
index = 0;
|
|
|
|
while (index <= (int)descaped.length() && descaped.at(index) != '>')
|
|
|
|
index++;
|
|
|
|
index++;
|
|
|
|
while (descaped.at(index) == ' ')
|
|
|
|
index++;
|
|
|
|
descaped = descaped.mid(index);
|
|
|
|
|
|
|
|
index = descaped.length() - 1;
|
|
|
|
while (index >= 0 && descaped.at(index) != '<')
|
|
|
|
index--;
|
|
|
|
|
|
|
|
descaped = descaped.left(index);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#ifdef POXML_DEBUG
|
|
|
|
tqDebug("english \"%s\" ORIG \"%s\" %d(%d-%d) %d(%d-%d) %d %d TRANS \"%s\" %d '%s'", xml.latin1(), (*it).msgid.latin1(),
|
|
|
|
start_pos, bi.start_line, bi.start_col,
|
|
|
|
end_pos, bi.end_line, bi.end_col,
|
|
|
|
(*it).lines.first().offset,
|
|
|
|
(*it).end,
|
|
|
|
translations[(*it).msgid].latin1(), (*it).end,
|
|
|
|
descaped.latin1()
|
|
|
|
);
|
|
|
|
#endif
|
|
|
|
|
|
|
|
if ((*it).end) {
|
|
|
|
if (!(*it).lines.first().offset && end_pos != old_pos) {
|
|
|
|
assert(start_pos >= old_pos);
|
|
|
|
ts << xml_text.mid(old_pos, start_pos - old_pos);
|
|
|
|
}
|
|
|
|
assert((*it).end >= bi.offset);
|
|
|
|
ts << translate(xml.mid(bi.offset, (*it).end - bi.offset),
|
|
|
|
(*it).msgid, descaped);
|
|
|
|
old_pos = end_pos;
|
|
|
|
} else {
|
|
|
|
if (start_pos != old_pos) {
|
|
|
|
if (start_pos < old_pos) {
|
|
|
|
tqDebug("so far: '%s'", output.latin1());
|
|
|
|
}
|
|
|
|
assert(start_pos > old_pos);
|
|
|
|
ts << xml_text.mid(old_pos, start_pos - old_pos);
|
|
|
|
}
|
|
|
|
old_pos = end_pos;
|
|
|
|
ts << translate(xml,
|
|
|
|
(*it).msgid, descaped);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
ts << xml_text.mid(old_pos);
|
|
|
|
|
|
|
|
output.replace(TQRegExp("<trans_comment\\s*>"), "");
|
|
|
|
output.replace(TQRegExp("</trans_comment\\s*>"), "");
|
|
|
|
|
|
|
|
StructureParser::removeEmptyTags(output);
|
|
|
|
|
|
|
|
index = 0;
|
|
|
|
while (true) {
|
|
|
|
index = output.find(TQRegExp(">[^\n]"), index );
|
|
|
|
if ( index == -1 )
|
|
|
|
break;
|
|
|
|
if ( output.at( index - 1 ) == '/' || output.at( index - 1 ) == '-' ||
|
|
|
|
output.at( index - 1 ) == ']' || output.at( index - 1 ) == '?' )
|
|
|
|
index = index + 1;
|
|
|
|
else {
|
|
|
|
output.replace( index, 1, "\n>" );
|
|
|
|
index = index + 2;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
output = StructureParser::descapeLiterals(output);
|
|
|
|
|
|
|
|
cout << output.utf8().data();
|
|
|
|
return 0;
|
|
|
|
}
|