kbibtex/src/encoderlatex.cpp

/***************************************************************************
*   Copyright (C) 2004-2009 by Thomas Fischer                             *
*   fischer@unix-ag.uni-kl.de                                             *
*                                                                         *
*   This program is free software; you can redistribute it and/or modify  *
*   it under the terms of the GNU General Public License as published by  *
*   the Free Software Foundation; either version 2 of the License, or     *
*   (at your option) any later version.                                   *
*                                                                         *
*   This program is distributed in the hope that it will be useful,       *
*   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
*   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
*   GNU General Public License for more details.                          *
*                                                                         *
*   You should have received a copy of the GNU General Public License     *
*   along with this program; if not, write to the                         *
*   Free Software Foundation, Inc.,                                       *
*   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.             *
***************************************************************************/
#include <tqstring.h>
#include <tqapplication.h>
#include <tqregexp.h>

#include "encoderlatex.h"

namespace BibTeX
{
    EncoderLaTeX *EncoderLaTeX::encoderLaTeX = NULL;

    static struct Decomposition
    {
        const char *latexCommand;
        unsigned int unicode;
    }
    decompositions[] =
    {
        {"`", 0x0300},
        {"'", 0x0301},
        {"^", 0x0302},
        {"~", 0x0303},
        {"=", 0x0304},
        /*{"x", 0x0305},  OVERLINE */
        {"u", 0x0306},
        {".", 0x0307},
        /*{"x", 0x0309},  HOOK ABOVE */
        {"r", 0x030a},
        {"H", 0x030b},
        {"v", 0x030c},
        /*{"x", 0x030d},  VERTICAL LINE ABOVE */
        /*{"x", 0x030e},  DOUBLE VERTICAL LINE ABOVE */
        /*{"x", 0x030f},  DOUBLE GRAVE ACCENT */
        /*{"x", 0x0310},  CANDRABINDU */
        /*{"x", 0x0311},  INVERTED BREVE */
        /*{"x", 0x0312},  TURNED COMMA ABOVE */
        /*{"x", 0x0313},  COMMA ABOVE */
        /*{"x", 0x0314},  REVERSED COMMA ABOVE */
        /*{"x", 0x0315},   */
        /*{"x", 0x0316},   */
        /*{"x", 0x0317},   */
        /*{"x", 0x0318},   */
        /*{"x", 0x0319},   */
        /*{"x", 0x031a},   */
        /*{"x", 0x031b},   */
        /*{"x", 0x031c},   */
        /*{"x", 0x031d},   */
        /*{"x", 0x031e},   */
        /*{"x", 0x031f},   */
        /*{"x", 0x0320},   */
        /*{"x", 0x0321},   */
        /*{"x", 0x0322},   */
        {"d", 0x0323},
        /*{"x", 0x0324},   */
        /*{"x", 0x0325},   */
        /*{"x", 0x0326},   */
        {"d", 0x0327},
        {"k", 0x0328},
        /*{"x", 0x0329},   */
        /*{"x", 0x032a},   */
        /*{"x", 0x032b},   */
        /*{"x", 0x032c},   */
        /*{"x", 0x032d},   */
        /*{"x", 0x032e},   */
        /*{"x", 0x032f},   */
        {"b", 0x0331},
        {"t", 0x0361}
    };

    static const int decompositionscount = sizeof( decompositions ) / sizeof( decompositions[ 0 ] ) ;

    static const struct EncoderLaTeXCommandMapping
    {
        const char *letters;
        unsigned int unicode;
    }
    commandmappingdatalatex[] =
    {
        {"AA", 0x00C5},
        {"AE", 0x00C6},
        {"ss", 0x00DF},
        {"aa", 0x00E5},
        {"ae", 0x00E6},
        {"OE", 0x0152},
        {"oe", 0x0153},
        {"ldots", 0x2026},
        {"L", 0x0141},
        {"l", 0x0142},
        {"grqq", 0x201C},
        {"glqq", 0x201E},
        {"frqq", 0x00BB},
        {"flqq", 0x00AB},

// awk -F '[{}\\\\]+' '/DeclareUnicodeCharacter/ { print "{\""$4"\", 0x"$3"},"}' /usr/share/texmf-dist/tex/latex/base/t2aenc.dfu | grep '0x04' | sort -r -f
        {"cyrzhdsc", 0x0497},
        {"CYRZHDSC", 0x0496},
        {"cyrzh", 0x0436},
        {"CYRZH", 0x0416},
        {"cyrzdsc", 0x0499},
        {"CYRZDSC", 0x0498},
        {"cyrz", 0x0437},
        {"CYRZ", 0x0417},
        {"cyryu", 0x044E},
        {"CYRYU", 0x042E},
        {"cyryo", 0x0451},
        {"CYRYO", 0x0401},
        {"cyryi", 0x0457},
        {"CYRYI", 0x0407},
        {"cyryhcrs", 0x04B1},
        {"CYRYHCRS", 0x04B0},
        {"cyrya", 0x044F},
        {"CYRYA", 0x042F},
        {"cyry", 0x04AF},
        {"CYRY", 0x04AE},
        {"cyrv", 0x0432},
        {"CYRV", 0x0412},
        {"cyrushrt", 0x045E},
        {"CYRUSHRT", 0x040E},
        {"cyru", 0x0443},
        {"CYRU", 0x0423},
        {"cyrtshe", 0x045B},
        {"CYRTSHE", 0x040B},
        {"cyrtdsc", 0x04AD},
        {"CYRTDSC", 0x04AC},
        {"cyrt", 0x0442},
        {"CYRT", 0x0422},
        {"cyrshha", 0x04BB},
        {"CYRSHHA", 0x04BA},
        {"cyrshch", 0x0449},
        {"CYRSHCH", 0x0429},
        {"cyrsh", 0x0448},
        {"CYRSH", 0x0428},
        {"cyrsftsn", 0x044C},
        {"CYRSFTSN", 0x042C},
        {"cyrsdsc", 0x04AB},
        {"CYRSDSC", 0x04AA},
        {"cyrschwa", 0x04D9},
        {"CYRSCHWA", 0x04D8},
        {"cyrs", 0x0441},
        {"CYRS", 0x0421},
        {"cyrr", 0x0440},
        {"CYRR", 0x0420},
        {"CYRpalochka", 0x04C0},
        {"cyrp", 0x043F},
        {"CYRP", 0x041F},
        {"cyrotld", 0x04E9},
        {"CYROTLD", 0x04E8},
        {"cyro", 0x043E},
        {"CYRO", 0x041E},
        {"cyrnje", 0x045A},
        {"CYRNJE", 0x040A},
        {"cyrng", 0x04A5},
        {"CYRNG", 0x04A4},
        {"cyrndsc", 0x04A3},
        {"CYRNDSC", 0x04A2},
        {"cyrn", 0x043D},
        {"CYRN", 0x041D},
        {"cyrm", 0x043C},
        {"CYRM", 0x041C},
        {"cyrlje", 0x0459},
        {"CYRLJE", 0x0409},
        {"cyrl", 0x043B},
        {"CYRL", 0x041B},
        {"cyrkvcrs", 0x049D},
        {"CYRKVCRS", 0x049C},
        {"cyrkdsc", 0x049B},
        {"CYRKDSC", 0x049A},
        {"cyrk", 0x043A},
        {"CYRK", 0x041A},
        {"cyrje", 0x0458},
        {"CYRJE", 0x0408},
        {"cyrishrt", 0x0439},
        {"CYRISHRT", 0x0419},
        {"cyrii", 0x0456},
        {"CYRII", 0x0406},
        {"cyrie", 0x0454},
        {"CYRIE", 0x0404},
        {"cyri", 0x0438},
        {"CYRI", 0x0418},
        {"cyrhrdsn", 0x044A},
        {"CYRHRDSN", 0x042A},
        {"cyrhdsc", 0x04B3},
        {"CYRHDSC", 0x04B2},
        {"cyrh", 0x0445},
        {"CYRH", 0x0425},
        {"cyrgup", 0x0491},
        {"CYRGUP", 0x0490},
        {"cyrghcrs", 0x0493},
        {"CYRGHCRS", 0x0492},
        {"cyrg", 0x0433},
        {"CYRG", 0x0413},
        {"cyrf", 0x0444},
        {"CYRF", 0x0424},
        {"cyrery", 0x044B},
        {"CYRERY", 0x042B},
        {"cyrerev", 0x044D},
        {"CYREREV", 0x042D},
        {"cyre", 0x0435},
        {"CYRE", 0x0415},
        {"cyrdzhe", 0x045F},
        {"CYRDZHE", 0x040F},
        {"cyrdze", 0x0455},
        {"CYRDZE", 0x0405},
        {"cyrdje", 0x0452},
        {"CYRDJE", 0x0402},
        {"cyrd", 0x0434},
        {"CYRD", 0x0414},
        {"cyrchvcrs", 0x04B9},
        {"CYRCHVCRS", 0x04B8},
        {"cyrchrdsc", 0x04B7},
        {"CYRCHRDSC", 0x04B6},
        {"cyrch", 0x0447},
        {"CYRCH", 0x0427},
        {"cyrc", 0x0446},
        {"CYRC", 0x0426},
        {"cyrb", 0x0431},
        {"CYRB", 0x0411},
        {"cyrae", 0x04D5},
        {"CYRAE", 0x04D4},
        {"cyra", 0x0430},
        {"CYRA", 0x0410}
    };

    static const int commandmappingdatalatexcount = sizeof( commandmappingdatalatex ) / sizeof( commandmappingdatalatex[ 0 ] ) ;

    /** Command can be either
        (1) {embraced}
        (2) delimited by {},
        (3) <space>, line end,
        (4) \following_command (including \<space>, which must be maintained!),
        (5) } (end of entry or group)
     **/
    const char *expansionsCmd[] = {"\\{\\\\%1\\}", "\\\\%1\\{\\}", "\\\\%1(\\n|\\r|\\\\|\\})", "\\\\%1\\s"};
    static const  int expansionscmdcount = sizeof( expansionsCmd ) / sizeof( expansionsCmd[0] );

    static const struct EncoderLaTeXModCharMapping
    {
        const char *modifier;
        const char *letter;
        unsigned int unicode;
    }
    modcharmappingdatalatex[] =
    {
        {"\\\\`", "A", 0x00C0},
        {"\\\\'", "A", 0x00C1},
        {"\\\\\\^", "A", 0x00C2},
        {"\\\\~", "A", 0x00C3},
        {"\\\\\"", "A", 0x00C4},
        {"\\\\r", "A", 0x00C5},
        /** 0x00C6 */
        {"\\\\c", "C", 0x00C7},
        {"\\\\`", "E", 0x00C8},
        {"\\\\'", "E", 0x00C9},
        {"\\\\\\^", "E", 0x00CA},
        {"\\\\\"", "E", 0x00CB},
        {"\\\\`", "I", 0x00CC},
        {"\\\\'", "I", 0x00CD},
        {"\\\\\\^", "I", 0x00CE},
        {"\\\\\"", "I", 0x00CF},
        /** 0x00D0 */
        {"\\\\~", "N", 0x00D1},
        {"\\\\`", "O", 0x00D2},
        {"\\\\'", "O", 0x00D3},
        {"\\\\\\^", "O", 0x00D4},
        /** 0x00D5 */
        {"\\\\\"", "O", 0x00D6},
        /** 0x00D7 */
        {"\\\\", "O", 0x00D8},
        {"\\\\`", "U", 0x00D9},
        {"\\\\'", "U", 0x00DA},
        {"\\\\\\^", "U", 0x00DB},
        {"\\\\\"", "U", 0x00DC},
        {"\\\\'", "Y", 0x00DD},
        /** 0x00DE */
        {"\\\\\"", "s", 0x00DF},
        {"\\\\`", "a", 0x00E0},
        {"\\\\'", "a", 0x00E1},
        {"\\\\\\^", "a", 0x00E2},
        {"\\\\~", "a", 0x00E3},
        {"\\\\\"", "a", 0x00E4},
        {"\\\\r", "a", 0x00E5},
        /** 0x00E6 */
        {"\\\\c", "c", 0x00E7},
        {"\\\\`", "e", 0x00E8},
        {"\\\\'", "e", 0x00E9},
        {"\\\\\\^", "e", 0x00EA},
        {"\\\\\"", "e", 0x00EB},
        {"\\\\`", "i", 0x00EC},
        {"\\\\'", "i", 0x00ED},
        {"\\\\'", "\\\\i", 0x00ED},
        {"\\\\\\^", "i", 0x00EE},
        /** 0x00EF */
        /** 0x00F0 */
        {"\\\\~", "n", 0x00F1},
        {"\\\\`", "o", 0x00F2},
        {"\\\\'", "o", 0x00F3},
        {"\\\\\\^", "o", 0x00F4},
        /** 0x00F5 */
        {"\\\\\"", "o", 0x00F6},
        /** 0x00F7 */
        {"\\\\", "o", 0x00F8},
        {"\\\\`", "u", 0x00F9},
        {"\\\\'", "u", 0x00FA},
        {"\\\\\\^", "u", 0x00FB},
        {"\\\\\"", "u", 0x00FC},
        {"\\\\'", "y", 0x00FD},
        /** 0x00FE */
        /** 0x00FF */
        /** 0x0100 */
        /** 0x0101 */
        {"\\\\u", "A", 0x0102},
        {"\\\\u", "a", 0x0103},
        /** 0x0104 */
        /** 0x0105 */
        {"\\\\'", "C", 0x0106},
        {"\\\\'", "c", 0x0107},
        /** 0x0108 */
        /** 0x0109 */
        /** 0x010A */
        /** 0x010B */
        {"\\\\v", "C", 0x010C},
        {"\\\\v", "c", 0x010D},
        {"\\\\v", "D", 0x010E},
        /** 0x010F */
        /** 0x0110 */
        /** 0x0111 */
        /** 0x0112 */
        /** 0x0113 */
        /** 0x0114 */
        /** 0x0115 */
        /** 0x0116 */
        /** 0x0117 */
        {"\\\\c", "E", 0x0118},
        {"\\\\c", "e", 0x0119},
        {"\\\\v", "E", 0x011A},
        {"\\\\v", "e", 0x011B},
        /** 0x011C */
        /** 0x011D */
        {"\\\\u", "G", 0x011E},
        {"\\\\u", "g", 0x011F},
        /** 0x0120 */
        /** 0x0121 */
        /** 0x0122 */
        /** 0x0123 */
        /** 0x0124 */
        /** 0x0125 */
        /** 0x0126 */
        /** 0x0127 */
        /** 0x0128 */
        /** 0x0129 */
        /** 0x012A */
        /** 0x012B */
        {"\\\\u", "I", 0x012C},
        {"\\\\u", "i", 0x012D},
        /** 0x012E */
        /** 0x012F */
        /** 0x0130 */
        /** 0x0131 */
        /** 0x0132 */
        /** 0x0133 */
        /** 0x0134 */
        /** 0x0135 */
        /** 0x0136 */
        /** 0x0137 */
        /** 0x0138 */
        {"\\\\'", "L", 0x0139},
        {"\\\\'", "l", 0x013A},
        /** 0x013B */
        /** 0x013C */
        /** 0x013D */
        /** 0x013E */
        /** 0x013F */
        /** 0x0140 */
        /** 0x0141 */
        /** 0x0142 */
        {"\\\\'", "N", 0x0143},
        {"\\\\'", "n", 0x0144},
        /** 0x0145 */
        /** 0x0146 */
        {"\\\\v", "N", 0x0147},
        {"\\\\v", "n", 0x0148},
        /** 0x0149 */
        /** 0x014A */
        /** 0x014B */
        /** 0x014C */
        /** 0x014D */
        {"\\\\u", "O", 0x014E},
        {"\\\\u", "o", 0x014F},
        {"\\\\H", "O", 0x0150},
        {"\\\\H", "o", 0x0151},
        /** 0x0152 */
        /** 0x0153 */
        {"\\\\'", "R", 0x0154},
        {"\\\\'", "r", 0x0155},
        /** 0x0156 */
        /** 0x0157 */
        {"\\\\v", "R", 0x0158},
        {"\\\\v", "r", 0x0159},
        {"\\\\'", "S", 0x015A},
        {"\\\\'", "s", 0x015B},
        /** 0x015C */
        /** 0x015D */
        {"\\\\c", "S", 0x015E},
        {"\\\\c", "s", 0x015F},
        {"\\\\v", "S", 0x0160},
        {"\\\\v", "s", 0x0161},
        /** 0x0162 */
        /** 0x0163 */
        {"\\\\v", "T", 0x0164},
        /** 0x0165 */
        /** 0x0166 */
        /** 0x0167 */
        /** 0x0168 */
        /** 0x0169 */
        /** 0x016A */
        /** 0x016B */
        {"\\\\u", "U", 0x016C},
        {"\\\\u", "u", 0x016D},
        {"\\\\r", "U", 0x016E},
        {"\\\\r", "u", 0x016F},
        /** 0x0170 */
        /** 0x0171 */
        /** 0x0172 */
        /** 0x0173 */
        /** 0x0174 */
        /** 0x0175 */
        /** 0x0176 */
        /** 0x0177 */
        {"\\\\\"", "Y", 0x0178},
        {"\\\\'", "Z", 0x0179},
        {"\\\\'", "z", 0x017A},
        /** 0x017B */
        /** 0x017C */
        {"\\\\v", "Z", 0x017D},
        {"\\\\v", "z", 0x017E},
        /** 0x017F */
        /** 0x0180 */
        {"\\\\v", "A", 0x01CD},
        {"\\\\v", "a", 0x01CE},
        {"\\\\v", "G", 0x01E6},
        {"\\\\v", "g", 0x01E7}
    };

    const char *expansionsMod1[] = {"\\{%1\\{%2\\}\\}", "\\{%1 %2\\}", "%1\\{%2\\}"};
    static const  int expansionsmod1count = sizeof( expansionsMod1 ) / sizeof( expansionsMod1[0] );
    const char *expansionsMod2[] = {"\\{%1%2\\}", "%1%2\\{\\}", "%1%2"};
    static const  int expansionsmod2count = sizeof( expansionsMod2 ) / sizeof( expansionsMod2[0] );

    static const int modcharmappingdatalatexcount = sizeof( modcharmappingdatalatex ) / sizeof( modcharmappingdatalatex[ 0 ] ) ;

    static const struct EncoderLaTeXCharMapping
    {
        const char *regexp;
        unsigned int unicode;
        const char *latex;
    }
    charmappingdatalatex[] =
    {
        {"\\\\#", 0x0023, "\\#"},
        {"\\\\&", 0x0026, "\\&"},
        {"\\\\_", 0x005F, "\\_"},
        {"!`", 0x00A1, "!`"},
        {"\"<", 0x00AB, "\"<"},
        {"\">", 0x00BB, "\">"},
        {"[?]`", 0x00BF, "?`"},
        {"--", 0x2013, "--"}
    };

    static const int charmappingdatalatexcount = sizeof( charmappingdatalatex ) / sizeof( charmappingdatalatex[ 0 ] ) ;

    EncoderLaTeX::EncoderLaTeX()
    {
        buildCharMapping();
        buildCombinedMapping();
    }

    EncoderLaTeX::~EncoderLaTeX()
    {
        // nothing
    }

    TQString EncoderLaTeX::decode( const TQString & text )
    {
        const TQString splitMarker = "|KBIBTEX|";

        /** start-stop marker ensures that each text starts and stops
          * with plain text and not with an inline math environment.
          * This invariant is exploited implicitly in the code below. */
        const TQString startStopMarker="|STARTSTOP|";
        TQString result = startStopMarker + text + startStopMarker;

        /** Collect (all?) urls from the BibTeX file and store them in urls */
        /** Problem is that the replace function below will replace
          * character sequences in the URL rendering the URL invalid.
          * Later, all URLs will be replaced back to their original
          * in the hope nothing breaks ... */
        TQStringList urls;
        TQRegExp httpRegExp( "(ht|f)tp://[^\"} ]+" );
        httpRegExp.setMinimal( false );
        int pos = 0;
        while ( pos >= 0 )
        {
            pos = httpRegExp.search( result, pos );
            if ( pos >= 0 )
            {
                ++pos;
                TQString url = httpRegExp.cap( 0 );
                urls << url;
            }
        }

        decomposedUTF8toLaTeX( result );

        /** split text into math and non-math regions */
        TQStringList intermediate = TQStringList::split( '$', result, true );
        TQStringList::Iterator it = intermediate.begin();
        while ( it != intermediate.end() )
        {
            /**
             * Sometimes we split strings like "\$", which is not intended.
             * So, we have to manually fix things by checking for strings
             * ending with "\" and append both the removed dollar sign and
             * the following string (which was never supposed to be an
             * independent string). Finally, we remove the unnecessary
             * string and continue.
             */
            if (( *it ).endsWith( "\\" ) )
            {
                TQStringList::Iterator cur = it;
                ++it;
                ( *cur ).append( '$' ).append( *it );
                intermediate.remove( it );
                it = cur;
            }
            else
                ++it;
        }

        tqApp->processEvents();

        result = "";
        for ( TQStringList::Iterator it = intermediate.begin(); it != intermediate.end(); ++it )
        {
            if ( !result.isEmpty() ) result.append( splitMarker );
            result.append( *it );

            ++it;
            if ( it == intermediate.end() )
                break;

            if (( *it ).length() > 256 )
                tqDebug( "Very long math equation using $ found, maybe due to broken inline math: %s", ( *it ).left( 48 ).latin1() );
        }

        tqApp->processEvents();

        for ( TQValueList<CharMappingItem>::ConstIterator cmit = m_charMapping.begin(); cmit != m_charMapping.end(); ++cmit )
            result.replace(( *cmit ).regExp, ( *cmit ).unicode );

        tqApp->processEvents();

        TQStringList transformed = TQStringList::split( splitMarker, result, true );

        tqApp->processEvents();

        result = "";
        for ( TQStringList::Iterator itt = transformed.begin(), iti = intermediate.begin(); itt != transformed.end() && iti != intermediate.end(); ++itt, ++iti )
        {
            result.append( *itt );

            ++iti;
            if ( iti == intermediate.end() )
                break;

            result.append( "$" ).append( *iti ).append( "$" );
        }

        tqApp->processEvents();

        /** Reinserting original URLs as explained above */
        pos = 0;
        int idx = 0;
        while ( pos >= 0 )
        {
            pos = httpRegExp.search( result, pos );
            if ( pos >= 0 )
            {
                ++pos;
                int len = httpRegExp.cap( 0 ).length();
                result = result.left( pos - 1 ).append( urls[idx++] ).append( result.mid( pos + len - 1 ) );
            }
        }

        return result.replace( startStopMarker,"" );
    }

    TQString EncoderLaTeX::encode( const TQString & text )
    {
        const TQString splitMarker = "|KBIBTEX|";

        /** start-stop marker ensures that each text starts and stops
          * with plain text and not with an inline math environment.
          * This invariant is exploited implicitly in the code below. */
        const TQString startStopMarker="|STARTSTOP|";
        TQString result = startStopMarker + text + startStopMarker;

        /** Collect (all?) urls from the BibTeX file and store them in urls */
        /** Problem is that the replace function below will replace
          * character sequences in the URL rendering the URL invalid.
          * Later, all URLs will be replaced back to their original
          * in the hope nothing breaks ... */
        TQStringList urls;
        TQRegExp httpRegExp( "(ht|f)tp://[^\"} ]+" );
        httpRegExp.setMinimal( false );
        int pos = 0;
        while ( pos >= 0 )
        {
            pos = httpRegExp.search( result, pos );
            if ( pos >= 0 )
            {
                ++pos;
                TQString url = httpRegExp.cap( 0 );
                urls << url;
            }
        }

        /** split text into math and non-math regions */
        TQStringList intermediate = TQStringList::split( '$', result, true );
        TQStringList::Iterator it = intermediate.begin();
        while ( it != intermediate.end() )
        {
            /**
             * Sometimes we split strings like "\$", which is not intended.
             * So, we have to manually fix things by checking for strings
             * ending with "\" and append both the removed dollar sign and
             * the following string (which was never supposed to be an
             * independent string). Finally, we remove the unnecessary
             * string and continue.
             */
            if (( *it ).endsWith( "\\" ) )
            {
                TQStringList::Iterator cur = it;
                ++it;
                ( *cur ).append( '$' ).append( *it );
                intermediate.remove( it );
                it = cur;
            }
            else
                ++it;
        }

        tqApp->processEvents();

        result = "";
        for ( TQStringList::Iterator it = intermediate.begin(); it != intermediate.end(); ++it )
        {
            if ( !result.isEmpty() ) result.append( splitMarker );
            result.append( *it );

            ++it;
            if ( it == intermediate.end() )
                break;

            if (( *it ).length() > 256 )
                tqDebug( "Very long math equation using $ found, maybe due to broken inline math: %s", ( *it ).left( 48 ).latin1() );
        }

        tqApp->processEvents();

        for ( TQValueList<CharMappingItem>::ConstIterator cmit = m_charMapping.begin(); cmit != m_charMapping.end(); ++cmit )
            result.replace(( *cmit ).unicode, ( *cmit ).latex );

        tqApp->processEvents();

        TQStringList transformed = TQStringList::split( splitMarker, result, true );

        tqApp->processEvents();

        result = "";
        for ( TQStringList::Iterator itt = transformed.begin(), iti = intermediate.begin(); itt != transformed.end() && iti != intermediate.end(); ++itt, ++iti )
        {
            result.append( *itt );

            ++iti;
            if ( iti == intermediate.end() )
                break;

            result.append( "$" ).append( *iti ).append( "$" );
        }

        tqApp->processEvents();

        /** \url accepts unquotet & and _
           May introduce new problem tough */
        if ( result.contains( "\\url{" ) )
            result.replace( "\\&", "&" ).replace( "\\_", "_" ).replace( TQChar( 0x2013 ), "--" ).replace( "\\#", "#" );

        decomposedUTF8toLaTeX( result );

        /** Reinserting original URLs as explained above */
        pos = 0;
        int idx = 0;
        while ( pos >= 0 )
        {
            pos = httpRegExp.search( result, pos );
            if ( pos >= 0 )
            {
                ++pos;
                int len = httpRegExp.cap( 0 ).length();
                result = result.left( pos - 1 ).append( urls[idx++] ).append( result.mid( pos + len - 1 ) );
            }
        }

        return result.replace( startStopMarker,"" );
    }

    TQString EncoderLaTeX::encode( const TQString &text, const TQChar &replace )
    {
        TQString result = text;
        for ( TQValueList<CharMappingItem>::ConstIterator it = m_charMapping.begin(); it != m_charMapping.end(); ++it )
            if (( *it ).unicode == replace )
                result.replace(( *it ).unicode, ( *it ).latex );
        return result;
    }

    TQString EncoderLaTeX::encodeSpecialized( const TQString & text, const EntryField::FieldType fieldType )
    {
        TQString result = encode( text );

        switch ( fieldType )
        {
        case EntryField::ftPages:
            result.replace( TQChar( 0x2013 ), "--" );
            break;

        case EntryField::ftURL:
            result.replace( "\\&", "&" ).replace( "\\_", "_" ).replace( TQChar( 0x2013 ), "--" ).replace( "\\#", "#" );
            break;

        default:
            break;
        }

        return result;
    }

    TQString& EncoderLaTeX::decomposedUTF8toLaTeX( TQString &text )
    {
        for ( TQValueList<CombinedMappingItem>::Iterator it = m_combinedMapping.begin(); it != m_combinedMapping.end(); ++it )
        {
            int i = ( *it ).regExp.search( text );
            while ( i >= 0 )
            {
                TQString a = ( *it ).regExp.cap( 1 );
                text = text.left( i ) + "\\" + ( *it ).latex + "{" + a + "}" + text.mid( i + 2 );
                i = ( *it ).regExp.search( text, i + 1 );
            }
        }

        return text;
    }

    void EncoderLaTeX::buildCombinedMapping()
    {
        for ( int i = 0; i < decompositionscount; i++ )
        {
            CombinedMappingItem item;
            item.regExp = TQRegExp( "(.)" + TQString( TQChar( decompositions[i].unicode ) ) );
            item.latex = decompositions[i].latexCommand;
            m_combinedMapping.append( item );
        }
    }

    void EncoderLaTeX::buildCharMapping()
    {
        /** encoding and decoding for digraphs such as -- or ?` */
        for ( int i = 0; i < charmappingdatalatexcount; i++ )
        {
            CharMappingItem charMappingItem;
            charMappingItem.regExp = TQRegExp( charmappingdatalatex[ i ].regexp );
            charMappingItem.unicode = TQChar( charmappingdatalatex[ i ].unicode );
            charMappingItem.latex = TQString( charmappingdatalatex[ i ].latex );
            m_charMapping.append( charMappingItem );
        }

        /** encoding and decoding for commands such as \AA or \ss */
        for ( int i = 0; i < commandmappingdatalatexcount; ++i )
        {
            /** different types of writing such as {\AA} or \AA{} possible */
            for ( int j = 0; j < expansionscmdcount; ++j )
            {
                CharMappingItem charMappingItem;
                charMappingItem.regExp = TQRegExp( TQString( expansionsCmd[j] ).arg( commandmappingdatalatex[i].letters ) );
                charMappingItem.unicode = TQChar( commandmappingdatalatex[i].unicode );
                if ( charMappingItem.regExp.numCaptures() > 0 )
                    charMappingItem.unicode += TQString( "\\1" );
                charMappingItem.latex = TQString( "{\\%1}" ).arg( commandmappingdatalatex[i].letters );
                m_charMapping.append( charMappingItem );
            }
        }

        /** encoding and decoding for letters such as \"a */
        for ( int i = 0; i < modcharmappingdatalatexcount; ++i )
        {
            TQString modifierRegExp = TQString( modcharmappingdatalatex[i].modifier );
            TQString modifier = modifierRegExp;
            modifier.replace( "\\^", "^" ).replace( "\\\\", "\\" );

            /** first batch of replacement rules, where no separator is required between modifier and character (e.g. \"a) */
            if ( !modifierRegExp.at( modifierRegExp.length() - 1 ).isLetter() )
                for ( int j = 0; j < expansionsmod2count; ++j )
                {
                    CharMappingItem charMappingItem;
                    charMappingItem.regExp = TQRegExp( TQString( expansionsMod2[j] ).arg( modifierRegExp ).arg( modcharmappingdatalatex[i].letter ) );
                    charMappingItem.unicode = TQChar( modcharmappingdatalatex[i].unicode );
                    charMappingItem.latex = TQString( "{%1%2}" ).arg( modifier ).arg( modcharmappingdatalatex[i].letter );
                    m_charMapping.append( charMappingItem );
                }

            /** second batch of replacement rules, where a separator is required between modifier and character (e.g. \v{g}) */
            for ( int j = 0; j < expansionsmod1count; ++j )
            {
                CharMappingItem charMappingItem;
                charMappingItem.regExp = TQRegExp( TQString( expansionsMod1[j] ).arg( modifierRegExp ).arg( modcharmappingdatalatex[i].letter ) );
                charMappingItem.unicode = TQChar( modcharmappingdatalatex[i].unicode );
                charMappingItem.latex = TQString( "%1{%2}" ).arg( modifier ).arg( modcharmappingdatalatex[i].letter );
                m_charMapping.append( charMappingItem );
            }
        }
    }

    EncoderLaTeX* EncoderLaTeX::currentEncoderLaTeX()
    {
        if ( encoderLaTeX == NULL )
            encoderLaTeX = new EncoderLaTeX();

        return encoderLaTeX;
    }

    void EncoderLaTeX::deleteCurrentEncoderLaTeX()
    {
        if ( encoderLaTeX != NULL )
        {
            delete encoderLaTeX;
            encoderLaTeX = NULL;
        }
    }

    char EncoderLaTeX::unicodeToASCII( unsigned int unicode )
    {
        if ( unicode < 128 ) return ( char )unicode;
        for ( int i = 0; i < modcharmappingdatalatexcount; ++i )
            if ( modcharmappingdatalatex[i].unicode == unicode )
                return *modcharmappingdatalatex[i].letter;
        return '?';
    }

}