pytqt/pylupdate3/fetchtr.cpp

/**********************************************************************
** Copyright (C) 2002 Detlev Offenbach <detlev@die-offenbachs.de>
**
** This is a modified version of lupdate. The original is part of TQt-Linguist.
** The copyright of the original file can be found below.
**
** This version is modified to handle python sources.
**
**   The file is provided AS IS with NO WARRANTY OF ANY KIND,
**   INCLUDING THE WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR
**   A PARTICULAR PURPOSE.
**
**********************************************************************/


/**********************************************************************
**   Copyright (C) 2000 Trolltech AS.  All rights reserved.
**
**   fetchtr.cpp
**
**   This file is part of TQt Linguist.
**
**   See the file LICENSE included in the distribution for the usage
**   and distribution terms.
**
**   The file is provided AS IS with NO WARRANTY OF ANY KIND,
**   INCLUDING THE WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR
**   A PARTICULAR PURPOSE.
**
**********************************************************************/

#include <tqfile.h>
#include <tqregexp.h>
#include <tqstring.h>
#include <tqtextstream.h>

#include <ctype.h>
#include <errno.h>
#include <metatranslator.h>
#include <stdio.h>
#include <string.h>
/*#include <tqxml.h>*/


static const char MagicComment[] = "TRANSLATOR ";

static TQMap<TQCString, int> needs_Q_OBJECT;
static TQMap<TQCString, int> lacks_Q_OBJECT;

/*
  The first part of this source file is the python tokenizer.  We skip
  most of python; the only tokens that interest us are defined here.
*/

enum { Tok_Eof, Tok_class, Tok_return, Tok_tr,
       Tok_trUtf8, Tok_translate, Tok_Ident,
       Tok_Comment, Tok_Dot, Tok_String,
       Tok_LeftParen, Tok_RightParen,
       Tok_Comma};

/*
  The tokenizer maintains the following global variables. The names
  should be self-explanatory.
*/
static TQCString yyFileName;
static int yyCh;
static char yyIdent[128];
static size_t yyIdentLen;
static char yyComment[65536];
static size_t yyCommentLen;
static char yyString[16384];
static size_t yyStringLen;
static int yyParenDepth;
static int yyLineNo;
static int yyCurLineNo;

// the file to read from (if reading from a file)
static FILE *yyInFile;

// the string to read from and current position in the string (otherwise)
static TQString yyInStr;
static int yyInPos;
static int buf;

static int (*getChar)();
static int (*peekChar)();

static int getCharFromFile()
{
    int c;

    if ( buf < 0 )
        c = getc( yyInFile );
    else {
        c = buf;
        buf = -1;
    }
    if ( c == '\n' )
        yyCurLineNo++;
    return c;
}

static int peekCharFromFile()
{
    int c = getc( yyInFile );
    buf = c;
    return c;
}

static void startTokenizer( const char *fileName, int (*getCharFunc)(),
                            int (*peekCharFunc)() )
{
    yyInPos = 0;
    buf = -1;
    getChar = getCharFunc;
    peekChar = peekCharFunc;

    yyFileName = fileName;
    yyCh = getChar();
    yyParenDepth = 0;
    yyCurLineNo = 1;
}

static int getToken()
{
    const char tab[] = "abfnrtv";
    const char backTab[] = "\a\b\f\n\r\t\v";
    uint n;

    yyIdentLen = 0;
    yyCommentLen = 0;
    yyStringLen = 0;

    while ( yyCh != EOF ) {
        yyLineNo = yyCurLineNo;

        if ( isalpha(yyCh) || yyCh == '_' ) {
            do {
                if ( yyIdentLen < sizeof(yyIdent) - 1 )
                    yyIdent[yyIdentLen++] = (char) yyCh;
                yyCh = getChar();
            } while ( isalnum(yyCh) || yyCh == '_' );
            yyIdent[yyIdentLen] = '\0';

            switch ( yyIdent[0] ) {
                case 'Q':
                    if ( strcmp(yyIdent + 1, "T_TR_NOOP") == 0 ) {
                        return Tok_tr;
                    } else if ( strcmp(yyIdent + 1, "T_TRANSLATE_NOOP") == 0 ) {
                        return Tok_translate;
                    }
                    break;
                case 'c':
                    if ( strcmp(yyIdent + 1, "lass") == 0 )
                        return Tok_class;
                    break;
                case 'r':
                    if ( strcmp(yyIdent + 1, "eturn") == 0 )
                        return Tok_return;
                    break;
                case 't':
                    if ( strcmp(yyIdent + 1, "r") == 0 )
                        return Tok_tr;
                    else if ( strcmp(yyIdent + 1, "rUtf8") == 0 )
                        return Tok_trUtf8;
                    else if ( strcmp(yyIdent + 1, "ranslate") == 0 )
                        return Tok_translate;
                case '_':
                    if ( strcmp(yyIdent + 1, "_tr") == 0 )
                        return Tok_tr;
                    else if ( strcmp(yyIdent + 1, "_trUtf8") == 0 )
                        return Tok_trUtf8;
            }
            return Tok_Ident;
        } else {
            switch ( yyCh ) {
                case '#':
                    yyCh = getChar();
                    do {
                        yyCh = getChar();
                    } while ( yyCh != EOF && yyCh != '\n' );
                    break;
                case '"':
                case '\'':
                    int quoteChar;
                    int trippelQuote, singleQuote;
                    int in;

                    quoteChar = yyCh;
                    trippelQuote = 0;
                    singleQuote = 1;
                    in = 0;
                    yyCh = getChar();

                    while ( yyCh != EOF ) {
                        if ( singleQuote && (yyCh == '\n' || (in && yyCh == quoteChar)) )
                            break;

                        if ( yyCh == quoteChar ) {
                            if (peekChar() == quoteChar) {
                                yyCh = getChar();
                                if (!trippelQuote) {
                                    trippelQuote = 1;
                                    singleQuote = 0;
                                    in = 1;
                                    yyCh = getChar();
                                } else {
                                    yyCh = getChar();
                                    if (yyCh == quoteChar) {
                                        trippelQuote = 0;
                                        break;
                                    }
                                }
                            } else if (trippelQuote) {
                                if ( yyStringLen < sizeof(yyString) - 1 )
                                    yyString[yyStringLen++] = (char) yyCh;
                                yyCh = getChar();
                                continue;
                            } else
                                break;
                        } else
                            in = 1;

                        if ( yyCh == '\\' ) {
                            yyCh = getChar();

                            if ( yyCh == 'x' ) {
                                TQCString hex = "0";

                                yyCh = getChar();
                                while ( isxdigit(yyCh) ) {
                                    hex += (char) yyCh;
                                    yyCh = getChar();
                                }
                                sscanf( hex, "%x", &n );
                                if ( yyStringLen < sizeof(yyString) - 1 )
                                    yyString[yyStringLen++] = (char) n;
                            } else if ( yyCh >= '0' && yyCh < '8' ) {
                                TQCString oct = "";

                                do {
                                    oct += (char) yyCh;
                                    yyCh = getChar();
                                } while ( yyCh >= '0' && yyCh < '8' );
                                sscanf( oct, "%o", &n );
                                if ( yyStringLen < sizeof(yyString) - 1 )
                                    yyString[yyStringLen++] = (char) n;
                            } else {
                                const char *p = strchr( tab, yyCh );
                                if ( yyStringLen < sizeof(yyString) - 1 )
                                    yyString[yyStringLen++] = ( p == 0 ) ?
                                        (char) yyCh : backTab[p - tab];
                                yyCh = getChar();
                            }
                        } else {
                            if ( yyStringLen < sizeof(yyString) - 1 )
                                yyString[yyStringLen++] = (char) yyCh;
                            yyCh = getChar();
                        }
                    }
                    yyString[yyStringLen] = '\0';

                    if ( yyCh != quoteChar ) {
                        printf("%c\n", yyCh);
                        tqWarning( "%s:%d: Unterminated string",
                          (const char *) yyFileName, yyLineNo );
                    }

                    if ( yyCh == EOF ) {
                        return Tok_Eof;
                    } else {
                        yyCh = getChar();
                        return Tok_String;
                    }
                    break;
                case '(':
                    yyParenDepth++;
                    yyCh = getChar();
                    return Tok_LeftParen;
                case ')':
                    yyParenDepth--;
                    yyCh = getChar();
                    return Tok_RightParen;
                case ',':
                    yyCh = getChar();
                    return Tok_Comma;
                case '.':
                    yyCh = getChar();
                    return Tok_Dot;
                default:
                    yyCh = getChar();
            }
        }
    }
    return Tok_Eof;
}

/*
  The second part of this source file is the parser. It accomplishes
  a very easy task: It finds all strings inside a tr() or translate()
  call, and possibly finds out the context of the call. It supports
  three cases:
  (1) the context is specified, as in FunnyDialog.tr("Hello") or
     translate("FunnyDialog", "Hello");
  (2) the call appears within an inlined function;
  (3) the call appears within a function defined outside the class definition.
*/

static int yyTok;

static bool match( int t )
{
    bool matches = ( yyTok == t );
    if ( matches )
        yyTok = getToken();
    return matches;
}

static bool matchString( TQCString *s )
{
    bool matches = ( yyTok == Tok_String );
    *s = "";
    while ( yyTok == Tok_String ) {
        *s += yyString;
        yyTok = getToken();
    }
    return matches;
}

static bool matchEncoding( bool *utf8 )
{
    if ( yyTok == Tok_Ident ) {
        if ( strcmp(yyIdent, "TQApplication") == 0 ) {
            yyTok = getToken();
        }
        *utf8 = TQString( yyIdent ).endsWith( TQString("UTF8") );
        yyTok = getToken();
        return TRUE;
    } else {
        return FALSE;
    }
}

static void parse( MetaTranslator *tor, const char *initialContext,
           const char *defaultContext )
{
    TQMap<TQCString, TQCString> qualifiedContexts;
    TQCString context;
    TQCString text;
    TQCString com;
    TQCString functionContext = initialContext;
    TQCString prefix;
    bool utf8 = FALSE;

    yyTok = getToken();
    while ( yyTok != Tok_Eof ) {
        switch ( yyTok ) {
            case Tok_class:
                yyTok = getToken();
                functionContext = yyIdent;
                yyTok = getToken();
                break;
            case Tok_tr:
            case Tok_trUtf8:
                utf8 = ( yyTok == Tok_trUtf8 );
                yyTok = getToken();
                if ( match(Tok_LeftParen) && matchString(&text) ) {
                    com = "";
                    if ( match(Tok_RightParen) || (match(Tok_Comma) &&
                        matchString(&com) && match(Tok_RightParen)) ) {
                        if ( prefix.isNull() ) {
                            context = defaultContext;
                        } else if ( qstrcmp(prefix, "self") == 0 ) {
                            context = functionContext;
                        } else {
                            context = prefix;
                        }
                        prefix = (const char *) 0;

                        if ( qualifiedContexts.contains(context) )
                            context = qualifiedContexts[context];
                        tor->insert( MetaTranslatorMessage(context, text, com,
                               TQString::null, utf8) );
                    }
                }
                break;
            case Tok_translate:
                utf8 = FALSE;
                yyTok = getToken();
                if ( match(Tok_LeftParen) &&
                 matchString(&context) &&
                 match(Tok_Comma) &&
                 matchString(&text) ) {
                    com = "";
                    if ( match(Tok_RightParen) ||
                     (match(Tok_Comma) &&
                      matchString(&com) &&
                      (match(Tok_RightParen) ||
                       match(Tok_Comma) &&
                       matchEncoding(&utf8) &&
                       match(Tok_RightParen))) )
                        tor->insert( MetaTranslatorMessage(context, text, com,
                               TQString::null, utf8) );
                }
                break;
            case Tok_Ident:
                if ( !prefix.isNull() )
                    prefix += ".";
                prefix += yyIdent;
                yyTok = getToken();
                if ( yyTok != Tok_Dot )
                    prefix = (const char *) 0;
                break;
            case Tok_Comment:
                com = yyComment;
                com = com.simplifyWhiteSpace();
                if ( com.left(sizeof(MagicComment) - 1) == MagicComment ) {
                    com.remove( 0, sizeof(MagicComment) - 1 );
                    int k = com.find( ' ' );
                    if ( k == -1 ) {
                        context = com;
                    } else {
                        context = com.left( k );
                        com.remove( 0, k + 1 );
                        tor->insert( MetaTranslatorMessage(context, "", com,
                               TQString::null, FALSE) );
                    }
                }
                yyTok = getToken();
                break;
            default:
                yyTok = getToken();
        }
    }

    if ( yyParenDepth != 0 )
        tqWarning( "%s: Unbalanced parentheses in Python code",
            (const char *) yyFileName );
}

void fetchtr_py( const char *fileName, MetaTranslator *tor,
          const char *defaultContext, bool mustExist )
{
    yyInFile = fopen( fileName, "r" );
    if ( yyInFile == 0 ) {
        if ( mustExist )
            tqWarning( "pylupdate error: cannot open Python source file '%s': %s",
              fileName, strerror(errno) );
        return;
    }

    startTokenizer( fileName, getCharFromFile, peekCharFromFile );
    parse( tor, 0, defaultContext );
    fclose( yyInFile );
}