tdesdk/poxml/po2xml.cpp

 // #define POXML_DEBUG

#include "parser.h"
#include <stdlib.h>
#include <iostream>
#include <assert.h>
#include <tqregexp.h>

#include <fstream>
#include "GettextLexer.hpp"
#include "GettextParser.hpp"
#include "antlr/AST.hpp"
#include "antlr/CommonAST.hpp"

using namespace std;

TQString translate(TQString xml, TQString orig, TQString translation)
{
    TQString prefix;
    while (xml.at(0) == '<' && orig.at(0) != '<') {
        // a XML tag as prefix
        int index = xml.find('>');
        assert(index != -1);
        index++;
        while (xml.at(index) == ' ')
            index++;
        prefix = prefix + xml.left(index);
        xml = xml.mid(index, xml.length());
    }

    int index = xml.find(orig);
    if (index == -1) {
        tqWarning("can't find\n%s\nin\n%s", orig.latin1(), xml.latin1());
        exit(1);
    }
    if (!translation.isEmpty())
        xml.replace(index, orig.length(), translation);
    return prefix + xml;
}

int main( int argc, char **argv )
{
    if (argc != 3) {
        tqWarning("usage: %s english-XML translated-PO", argv[0]);
        ::exit(1);
    }

    MsgList english = parseXML(argv[1]);
    MsgList translated;

    try {
        ifstream s(argv[2]);
        GettextLexer lexer(s);
        GettextParser parser(lexer);
        translated = parser.file();

    } catch(exception& e) {
        cerr << "exception: " << e.what() << endl;
        return 1;
    }

    TQMap<TQString, TQString> translations;
    for (MsgList::ConstIterator it = translated.begin();
         it != translated.end(); ++it)
    {
        TQString msgstr;
        TQString msgid = escapePO((*it).msgid);
        if ((*it).comment.find("fuzzy") < 0)
            msgstr = escapePO((*it).msgstr);

#ifdef POXML_DEBUG
        tqDebug("inserting translations '%s' -> '%s'", msgid.latin1(),msgstr.latin1());
#endif
        translations.insert(msgid, msgstr);
    }

    TQFile xml(argv[1]);
    xml.open(IO_ReadOnly);
    TQTextStream ds(&xml);
    ds.setEncoding(TQTextStream::UnicodeUTF8);
    TQString xml_text = ds.read();
    xml.close();
    TQString output;
    TQTextStream ts(&output, IO_WriteOnly);
    StructureParser::cleanupTags(xml_text);

    TQValueList<int> line_offsets;
    line_offsets.append(0);
    int index = 0;
    while (true) {
        index = xml_text.find('\n', index) + 1;
        if (index <= 0)
            break;
        line_offsets.append(index);
    }

    int old_start_line = -1, old_start_col = -1;
    TQString old_text;
    MsgList::Iterator old_it = english.end();

    for (MsgList::Iterator it = english.begin();
         it != english.end(); ++it)
    {
        BlockInfo bi = (*it).lines.first();
        int start_pos = line_offsets[bi.start_line - 1] + bi.start_col;
        if (!bi.end_line)
            continue;
        int end_pos = line_offsets[bi.end_line - 1] + bi.end_col - 1;

        (*it).start = start_pos;
        if (old_start_line == bi.start_line &&
            old_start_col == bi.start_col)
        {
            (*old_it).end = bi.offset;
            (*it).end = end_pos;
        } else {
            (*it).lines.first().offset = 0;
            (*it).end = 0;
        }

        old_start_line = bi.start_line;
        old_start_col = bi.start_col;
        old_it = it;
    }

    int old_pos = 0;

    for (MsgList::Iterator it = english.begin();
         it != english.end(); ++it)
    {
        BlockInfo bi = (*it).lines.first();
        int start_pos = line_offsets[bi.start_line - 1] + bi.start_col;
        if (!bi.end_line)
            continue;
        int end_pos = line_offsets[bi.end_line - 1] + bi.end_col - 1;

        TQString xml = xml_text.mid(start_pos, end_pos - start_pos);
        int index = 0;
        while (true) {
            index = xml.find("<!--");
            if (index == -1)
                break;
            int end_index = index + 4;
            while (xml.at(end_index) != '>' ||
                   xml.at(end_index-1) != '-' ||
                   xml.at(end_index-2) != '-')
            {
                end_index++;
            }
            xml.replace(index, end_index + 1 - index, " ");
            index = end_index;
        }
        StructureParser::descape(xml);

        TQString descaped = StructureParser::descapeLiterals((*it).msgid);
        if (translations.contains(descaped))
            descaped = translations[descaped];

#ifdef POXML_DEBUG
        // assert(!descaped.isEmpty());
#endif

        if ((*it).msgid.at(0) == '<' &&  StructureParser::isClosure((*it).msgid)) {
            // if the id starts with a tag, then we remembered the
            // correct line information and need to strip the target
            // now, so it fits
            int index = 0;
            while ((*it).msgid.at(index) != '>')
                index++;
            index++;
            while ((*it).msgid.at(index) == ' ')
                index++;
            TQString omsgid = (*it).msgid;
            (*it).msgid = (*it).msgid.mid(index);

            index = (*it).msgid.length() - 1;
            while ((*it).msgid.at(index) != '<')
                index--;

            (*it).msgid = (*it).msgid.left(index);

            if (!descaped.isEmpty()) {
                if (descaped.at(0) != '<') {
                    tqWarning("the translation of '%s' doesn't start with a tag.", omsgid.latin1());
                    exit(1);
                }
                index = 0;
                while (index <= (int)descaped.length() && descaped.at(index) != '>')
                    index++;
                index++;
                while (descaped.at(index) == ' ')
                    index++;
                descaped = descaped.mid(index);

                index = descaped.length() - 1;
                while (index >= 0 && descaped.at(index) != '<')
                    index--;

                descaped = descaped.left(index);
            }
        }

#ifdef POXML_DEBUG
        tqDebug("english \"%s\" ORIG \"%s\" %d(%d-%d) %d(%d-%d) %d %d TRANS \"%s\" %d '%s'", xml.latin1(), (*it).msgid.latin1(),
               start_pos, bi.start_line, bi.start_col,
               end_pos, bi.end_line, bi.end_col,
               (*it).lines.first().offset,
               (*it).end,
               translations[(*it).msgid].latin1(), (*it).end,
               descaped.latin1()
            );
#endif

        if ((*it).end) {
            if (!(*it).lines.first().offset && end_pos != old_pos) {
                assert(start_pos >= old_pos);
                ts << xml_text.mid(old_pos, start_pos - old_pos);
            }
            assert((*it).end >= bi.offset);
            ts << translate(xml.mid(bi.offset, (*it).end - bi.offset),
                            (*it).msgid, descaped);
            old_pos = end_pos;
        } else {
            if (start_pos != old_pos) {
		if (start_pos < old_pos) {
			tqDebug("so far: '%s'", output.latin1());
		}
                assert(start_pos > old_pos);
                ts << xml_text.mid(old_pos, start_pos - old_pos);
            }
            old_pos = end_pos;
            ts << translate(xml,
                            (*it).msgid, descaped);
        }
    }

    ts << xml_text.mid(old_pos);

    output.replace(TQRegExp("<trans_comment\\s*>"), "");
    output.replace(TQRegExp("</trans_comment\\s*>"), "");

    StructureParser::removeEmptyTags(output);

    index = 0;
    while (true) {
        index = output.find(TQRegExp(">[^\n]"), index );
        if ( index == -1 )
            break;
        if ( output.at( index - 1 ) == '/' || output.at( index - 1 ) == '-' ||
             output.at( index - 1 ) == ']' || output.at( index - 1 ) == '?' )
            index = index + 1;
        else {
            output.replace( index, 1, "\n>" );
            index = index + 2;
        }
    }
    output = StructureParser::descapeLiterals(output);

    cout << output.utf8().data();
    return 0;
}