You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
pytqt/pylupdate3/fetchtr.cpp

456 lines
15 KiB

/**********************************************************************
** Copyright (C) 2002 Detlev Offenbach <detlev@die-offenbachs.de>
**
** This is a modified version of lupdate. The original is part of TQt-Linguist.
** The copyright of the original file can be found below.
**
** This version is modified to handle python sources.
**
** The file is provided AS IS with NO WARRANTY OF ANY KIND,
** INCLUDING THE WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR
** A PARTICULAR PURPOSE.
**
**********************************************************************/
/**********************************************************************
** Copyright (C) 2000 Trolltech AS. All rights reserved.
**
** fetchtr.cpp
**
** This file is part of TQt Linguist.
**
** See the file LICENSE included in the distribution for the usage
** and distribution terms.
**
** The file is provided AS IS with NO WARRANTY OF ANY KIND,
** INCLUDING THE WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR
** A PARTICULAR PURPOSE.
**
**********************************************************************/
#include <qfile.h>
#include <qregexp.h>
#include <qstring.h>
#include <qtextstream.h>
#include <ctype.h>
#include <errno.h>
#include <metatranslator.h>
#include <stdio.h>
#include <string.h>
/*#include <qxml.h>*/
static const char MagicComment[] = "TRANSLATOR ";
static TQMap<TQCString, int> needs_Q_OBJECT;
static TQMap<TQCString, int> lacks_Q_OBJECT;
/*
The first part of this source file is the python tokenizer. We skip
most of python; the only tokens that interest us are defined here.
*/
enum { Tok_Eof, Tok_class, Tok_return, Tok_tr,
Tok_trUtf8, Tok_translate, Tok_Ident,
Tok_Comment, Tok_Dot, Tok_String,
Tok_LeftParen, Tok_RightParen,
Tok_Comma};
/*
The tokenizer maintains the following global variables. The names
should be self-explanatory.
*/
static TQCString yyFileName;
static int yyCh;
static char yyIdent[128];
static size_t yyIdentLen;
static char yyComment[65536];
static size_t yyCommentLen;
static char yyString[16384];
static size_t yyStringLen;
static int yyParenDepth;
static int yyLineNo;
static int yyCurLineNo;
// the file to read from (if reading from a file)
static FILE *yyInFile;
// the string to read from and current position in the string (otherwise)
static TQString yyInStr;
static int yyInPos;
static int buf;
static int (*getChar)();
static int (*peekChar)();
static int getCharFromFile()
{
int c;
if ( buf < 0 )
c = getc( yyInFile );
else {
c = buf;
buf = -1;
}
if ( c == '\n' )
yyCurLineNo++;
return c;
}
static int peekCharFromFile()
{
int c = getc( yyInFile );
buf = c;
return c;
}
static void startTokenizer( const char *fileName, int (*getCharFunc)(),
int (*peekCharFunc)() )
{
yyInPos = 0;
buf = -1;
getChar = getCharFunc;
peekChar = peekCharFunc;
yyFileName = fileName;
yyCh = getChar();
yyParenDepth = 0;
yyCurLineNo = 1;
}
static int getToken()
{
const char tab[] = "abfnrtv";
const char backTab[] = "\a\b\f\n\r\t\v";
uint n;
yyIdentLen = 0;
yyCommentLen = 0;
yyStringLen = 0;
while ( yyCh != EOF ) {
yyLineNo = yyCurLineNo;
if ( isalpha(yyCh) || yyCh == '_' ) {
do {
if ( yyIdentLen < sizeof(yyIdent) - 1 )
yyIdent[yyIdentLen++] = (char) yyCh;
yyCh = getChar();
} while ( isalnum(yyCh) || yyCh == '_' );
yyIdent[yyIdentLen] = '\0';
switch ( yyIdent[0] ) {
case 'Q':
if ( strcmp(yyIdent + 1, "T_TR_NOOP") == 0 ) {
return Tok_tr;
} else if ( strcmp(yyIdent + 1, "T_TRANSLATE_NOOP") == 0 ) {
return Tok_translate;
}
break;
case 'c':
if ( strcmp(yyIdent + 1, "lass") == 0 )
return Tok_class;
break;
case 'r':
if ( strcmp(yyIdent + 1, "eturn") == 0 )
return Tok_return;
break;
case 't':
if ( strcmp(yyIdent + 1, "r") == 0 )
return Tok_tr;
else if ( strcmp(yyIdent + 1, "rUtf8") == 0 )
return Tok_trUtf8;
else if ( strcmp(yyIdent + 1, "ranslate") == 0 )
return Tok_translate;
case '_':
if ( strcmp(yyIdent + 1, "_tr") == 0 )
return Tok_tr;
else if ( strcmp(yyIdent + 1, "_trUtf8") == 0 )
return Tok_trUtf8;
}
return Tok_Ident;
} else {
switch ( yyCh ) {
case '#':
yyCh = getChar();
do {
yyCh = getChar();
} while ( yyCh != EOF && yyCh != '\n' );
break;
case '"':
case '\'':
int quoteChar;
int trippelQuote, singleQuote;
int in;
quoteChar = yyCh;
trippelQuote = 0;
singleQuote = 1;
in = 0;
yyCh = getChar();
while ( yyCh != EOF ) {
if ( singleQuote && (yyCh == '\n' || (in && yyCh == quoteChar)) )
break;
if ( yyCh == quoteChar ) {
if (peekChar() == quoteChar) {
yyCh = getChar();
if (!trippelQuote) {
trippelQuote = 1;
singleQuote = 0;
in = 1;
yyCh = getChar();
} else {
yyCh = getChar();
if (yyCh == quoteChar) {
trippelQuote = 0;
break;
}
}
} else if (trippelQuote) {
if ( yyStringLen < sizeof(yyString) - 1 )
yyString[yyStringLen++] = (char) yyCh;
yyCh = getChar();
continue;
} else
break;
} else
in = 1;
if ( yyCh == '\\' ) {
yyCh = getChar();
if ( yyCh == 'x' ) {
TQCString hex = "0";
yyCh = getChar();
while ( isxdigit(yyCh) ) {
hex += (char) yyCh;
yyCh = getChar();
}
sscanf( hex, "%x", &n );
if ( yyStringLen < sizeof(yyString) - 1 )
yyString[yyStringLen++] = (char) n;
} else if ( yyCh >= '0' && yyCh < '8' ) {
TQCString oct = "";
do {
oct += (char) yyCh;
yyCh = getChar();
} while ( yyCh >= '0' && yyCh < '8' );
sscanf( oct, "%o", &n );
if ( yyStringLen < sizeof(yyString) - 1 )
yyString[yyStringLen++] = (char) n;
} else {
const char *p = strchr( tab, yyCh );
if ( yyStringLen < sizeof(yyString) - 1 )
yyString[yyStringLen++] = ( p == 0 ) ?
(char) yyCh : backTab[p - tab];
yyCh = getChar();
}
} else {
if ( yyStringLen < sizeof(yyString) - 1 )
yyString[yyStringLen++] = (char) yyCh;
yyCh = getChar();
}
}
yyString[yyStringLen] = '\0';
if ( yyCh != quoteChar ) {
printf("%c\n", yyCh);
qWarning( "%s:%d: Unterminated string",
(const char *) yyFileName, yyLineNo );
}
if ( yyCh == EOF ) {
return Tok_Eof;
} else {
yyCh = getChar();
return Tok_String;
}
break;
case '(':
yyParenDepth++;
yyCh = getChar();
return Tok_LeftParen;
case ')':
yyParenDepth--;
yyCh = getChar();
return Tok_RightParen;
case ',':
yyCh = getChar();
return Tok_Comma;
case '.':
yyCh = getChar();
return Tok_Dot;
default:
yyCh = getChar();
}
}
}
return Tok_Eof;
}
/*
The second part of this source file is the parser. It accomplishes
a very easy task: It finds all strings inside a tr() or translate()
call, and possibly finds out the context of the call. It supports
three cases:
(1) the context is specified, as in FunnyDialog.tr("Hello") or
translate("FunnyDialog", "Hello");
(2) the call appears within an inlined function;
(3) the call appears within a function defined outside the class definition.
*/
static int yyTok;
static bool match( int t )
{
bool matches = ( yyTok == t );
if ( matches )
yyTok = getToken();
return matches;
}
static bool matchString( TQCString *s )
{
bool matches = ( yyTok == Tok_String );
*s = "";
while ( yyTok == Tok_String ) {
*s += yyString;
yyTok = getToken();
}
return matches;
}
static bool matchEncoding( bool *utf8 )
{
if ( yyTok == Tok_Ident ) {
if ( strcmp(yyIdent, "TQApplication") == 0 ) {
yyTok = getToken();
}
*utf8 = TQString( yyIdent ).endsWith( TQString("UTF8") );
yyTok = getToken();
return TRUE;
} else {
return FALSE;
}
}
static void parse( MetaTranslator *tor, const char *initialContext,
const char *defaultContext )
{
TQMap<TQCString, TQCString> qualifiedContexts;
TQCString context;
TQCString text;
TQCString com;
TQCString functionContext = initialContext;
TQCString prefix;
bool utf8 = FALSE;
yyTok = getToken();
while ( yyTok != Tok_Eof ) {
switch ( yyTok ) {
case Tok_class:
yyTok = getToken();
functionContext = yyIdent;
yyTok = getToken();
break;
case Tok_tr:
case Tok_trUtf8:
utf8 = ( yyTok == Tok_trUtf8 );
yyTok = getToken();
if ( match(Tok_LeftParen) && matchString(&text) ) {
com = "";
if ( match(Tok_RightParen) || (match(Tok_Comma) &&
matchString(&com) && match(Tok_RightParen)) ) {
if ( prefix.isNull() ) {
context = defaultContext;
} else if ( qstrcmp(prefix, "self") == 0 ) {
context = functionContext;
} else {
context = prefix;
}
prefix = (const char *) 0;
if ( qualifiedContexts.contains(context) )
context = qualifiedContexts[context];
tor->insert( MetaTranslatorMessage(context, text, com,
TQString::null, utf8) );
}
}
break;
case Tok_translate:
utf8 = FALSE;
yyTok = getToken();
if ( match(Tok_LeftParen) &&
matchString(&context) &&
match(Tok_Comma) &&
matchString(&text) ) {
com = "";
if ( match(Tok_RightParen) ||
(match(Tok_Comma) &&
matchString(&com) &&
(match(Tok_RightParen) ||
match(Tok_Comma) &&
matchEncoding(&utf8) &&
match(Tok_RightParen))) )
tor->insert( MetaTranslatorMessage(context, text, com,
TQString::null, utf8) );
}
break;
case Tok_Ident:
if ( !prefix.isNull() )
prefix += ".";
prefix += yyIdent;
yyTok = getToken();
if ( yyTok != Tok_Dot )
prefix = (const char *) 0;
break;
case Tok_Comment:
com = yyComment;
com = com.simplifyWhiteSpace();
if ( com.left(sizeof(MagicComment) - 1) == MagicComment ) {
com.remove( 0, sizeof(MagicComment) - 1 );
int k = com.find( ' ' );
if ( k == -1 ) {
context = com;
} else {
context = com.left( k );
com.remove( 0, k + 1 );
tor->insert( MetaTranslatorMessage(context, "", com,
TQString::null, FALSE) );
}
}
yyTok = getToken();
break;
default:
yyTok = getToken();
}
}
if ( yyParenDepth != 0 )
qWarning( "%s: Unbalanced parentheses in Python code",
(const char *) yyFileName );
}
void fetchtr_py( const char *fileName, MetaTranslator *tor,
const char *defaultContext, bool mustExist )
{
yyInFile = fopen( fileName, "r" );
if ( yyInFile == 0 ) {
if ( mustExist )
qWarning( "pylupdate error: cannot open Python source file '%s': %s",
fileName, strerror(errno) );
return;
}
startTokenizer( fileName, getCharFromFile, peekCharFromFile );
parse( tor, 0, defaultContext );
fclose( yyInFile );
}