/*
Copyright ( C ) 2003 by Walter Schreppers
Copyright ( C ) 2004 by Cies Breijs
This program is free software ; you can redistribute it and / or
modify it under the terms of version 2 of the GNU General Public
License as published by the Free Software Foundation .
This program is distributed in the hope that it will be useful ,
but WITHOUT ANY WARRANTY ; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the
GNU General Public License for more details .
You should have received a copy of the GNU General Public License
along with this program ; if not , write to the Free Software
Foundation , Inc . , 51 Franklin Street , Fifth Floor , Boston , MA 02110 - 1301 , USA .
*/
# include <tqdom.h>
# include <tqfile.h>
# include <kdebug.h>
# include <tdelocale.h>
# include "settings.h"
# include "lexer.h"
Lexer : : Lexer ( TQTextIStream & iStream )
{
inputStream = & iStream ;
row = 1 ;
col = 1 ;
prevCol = 1 ;
translate = new Translate ( ) ;
}
Token Lexer : : lex ( )
{
skipSpaces ( ) ; // skips the white space that it quite likely (indentation) infront of the Token
Token currentToken ;
currentToken . type = tokNotSet ; // not really needed
currentToken . look = " " ;
currentToken . value = 0 ;
currentToken . start . row = row ;
currentToken . start . col = col ;
TQChar currentChar = getChar ( ) ;
if ( inputStream - > atEnd ( ) )
{
kdDebug ( 0 ) < < " Lexer::lex(), got EOF. " < < endl ;
currentToken . type = tokEOF ;
currentToken . look = " EOF " ;
ungetChar ( currentChar ) ; // unget the currentChar and fix the row/col values
return currentToken ;
}
if ( currentChar = = ' # ' )
{
while ( ! inputStream - > atEnd ( ) & & ! ( currentChar = = ' \x0a ' | | currentChar = = ' \n ' ) )
currentChar = getChar ( ) ;
}
// if (currentChar.category() == TQChar::Separator_Line) somehow doesnt work
if ( currentChar = = ' \x0a ' | | currentChar = = ' \n ' )
{
currentToken . type = tokEOL ;
currentToken . look = " EOL " ;
}
else if ( currentChar . isLetter ( ) | | currentChar = = ' [ ' | | currentChar = = ' ] ' )
{
ungetChar ( currentChar ) ;
// sets currentToken.look by reference, and set the currentToken.type to tokUnknown
currentToken . type = getWord ( currentToken . look ) ;
setTokenType ( currentToken ) ; // gets the actual tokenType
}
else if ( currentChar . isNumber ( ) )
{
ungetChar ( currentChar ) ;
// set currentToken.value/look by reference, and set the currentToken.type to tokNumber
currentToken . type = getNumber ( currentToken . value , currentToken . look ) ;
}
else if ( currentChar = = ' > ' )
{
currentChar = getChar ( ) ;
if ( currentChar = = ' = ' )
{
currentToken . type = tokGe ;
currentToken . look = " >= " ;
}
else
{
ungetChar ( currentChar ) ;
currentToken . type = tokGt ;
currentToken . look = " > " ;
}
}
else if ( currentChar = = ' < ' )
{
currentChar = getChar ( ) ;
if ( currentChar = = ' = ' )
{
currentToken . type = tokLe ;
currentToken . look = " <= " ;
}
else
{
ungetChar ( currentChar ) ;
currentToken . type = tokLt ;
currentToken . look = " > " ;
}
}
else if ( currentChar = = ' ! ' )
{
currentChar = getChar ( ) ;
if ( currentChar = = ' = ' )
{
currentToken . type = tokNe ;
currentToken . look = " != " ;
}
else
{
ungetChar ( currentChar ) ;
currentToken . type = tokNot ;
currentToken . look = " ! " ;
}
}
else if ( currentChar = = ' = ' )
{
currentChar = getChar ( ) ;
if ( currentChar = = ' = ' )
{
currentToken . type = tokEq ;
currentToken . look = " == " ;
}
else
{
ungetChar ( currentChar ) ;
currentToken . type = tokAssign ;
currentToken . look = " = " ;
}
}
else if ( currentChar = = ' ( ' )
{
currentToken . type = tokBraceOpen ;
currentToken . look = " ( " ;
}
else if ( currentChar = = ' ) ' )
{
currentToken . type = tokBraceClose ;
currentToken . look = " ) " ;
}
else if ( currentChar = = ' + ' )
{
currentToken . type = tokPlus ;
currentToken . look = " + " ;
}
else if ( currentChar = = ' - ' )
{
currentToken . type = tokMinus ;
currentToken . look = " - " ;
}
else if ( currentChar = = ' * ' )
{
currentToken . type = tokMul ;
currentToken . look = " * " ;
}
else if ( currentChar = = ' / ' )
{
currentToken . type = tokDev ;
currentToken . look = " / " ;
}
else if ( currentChar = = ' , ' )
{
currentToken . type = tokComma ;
currentToken . look = " , " ;
}
else if ( currentChar = = ' " ' )
{
getString ( currentToken ) ;
}
else
{
currentToken . type = tokUnknown ;
currentToken . look = currentChar ;
}
currentToken . end . row = row ;
currentToken . end . col = col ;
return currentToken ;
}
// PRIVATEs
TQChar Lexer : : getChar ( )
{
TQChar c ;
if ( ! putBackChar . isNull ( ) )
{
c = putBackChar ; // use the char that is stored to be put back
// kdDebug(0)<<"Lexer::getChar(), restored: '"<<c<<"' @ ("<<row<<", "<<col<<")"<<endl;
putBackChar = TQChar ( ) ; // and set putBackChar back to NULL
if ( c = = ' \x0a ' | | c = = ' \n ' )
{
row + + ;
prevCol = col ;
col = 1 ;
}
else
{
col + + ;
}
}
else
{
* inputStream > > c ; // take a TQChar of the inputStream
// kdDebug(0)<<"Lexer::getChar(): '"<<c<<"' @ ("<<row<<", "<<col<<")"<<endl;
if ( c = = ' \x0a ' | | c = = ' \n ' )
{
row + + ;
prevCol = col ;
col = 1 ;
}
else
{
col + + ;
}
}
return c ;
}
void Lexer : : ungetChar ( TQChar c )
{
if ( c = = ' \x0a ' | | c = = ' \n ' )
{
row - - ;
col = prevCol ;
}
else
{
col - - ;
}
putBackChar = c ;
// kdDebug(0)<<"Lexer::ungetChar(), saved char: '"<<c<<"' and steped back to ("<<row<<", "<<col<<")"<<endl;
}
int Lexer : : getWord ( TQString & word )
{
// kdDebug(0)<<"Lexer::getWord()"<<endl;
TQChar currentChar = getChar ( ) ;
if ( currentChar . isLetter ( ) | | currentChar = = ' [ ' | | currentChar = = ' ] ' ) {
while ( ( currentChar . isLetterOrNumber ( ) | | currentChar = = ' _ ' | | currentChar = = ' [ ' | | currentChar = = ' ] ' ) & & ! inputStream - > atEnd ( ) )
{
word + = currentChar ;
currentChar = getChar ( ) ;
}
kdDebug ( 0 ) < < " Lexer::getWord(), got NAME: ' " < < word < < " ' " < < endl ;
ungetChar ( currentChar ) ; //read one too much
return tokUnknown ; // returns tokUnknown, actual token is to be determained later in Lexer::setTokenType
}
else return tokError ;
}
void Lexer : : setTokenType ( Token & currentToken )
{
if ( currentToken . type = = tokUnknown )
{
// make lowercase copy of the word as it was found in the inputStream
TQString k = currentToken . look . lower ( ) ;
// if the key is an alias translate that alias to a key
if ( ! translate - > alias2key ( k ) . isEmpty ( ) ) k = translate - > alias2key ( k ) ;
if ( k = = translate - > name2key ( " begin " ) ) currentToken . type = tokBegin ;
else if ( k = = translate - > name2key ( " end " ) ) currentToken . type = tokEnd ;
else if ( k = = translate - > name2key ( " while " ) ) currentToken . type = tokWhile ;
else if ( k = = translate - > name2key ( " if " ) ) currentToken . type = tokIf ;
else if ( k = = translate - > name2key ( " else " ) ) currentToken . type = tokElse ;
else if ( k = = translate - > name2key ( " for " ) ) currentToken . type = tokFor ;
else if ( k = = translate - > name2key ( " to " ) ) currentToken . type = tokTo ;
else if ( k = = translate - > name2key ( " step " ) ) currentToken . type = tokStep ;
else if ( k = = translate - > name2key ( " and " ) ) currentToken . type = tokAnd ;
else if ( k = = translate - > name2key ( " or " ) ) currentToken . type = tokOr ;
else if ( k = = translate - > name2key ( " not " ) ) currentToken . type = tokNot ;
else if ( k = = translate - > name2key ( " return " ) ) currentToken . type = tokReturn ;
else if ( k = = translate - > name2key ( " break " ) ) currentToken . type = tokBreak ;
else if ( k = = translate - > name2key ( " run " ) ) currentToken . type = tokRun ;
else if ( k = = translate - > name2key ( " foreach " ) ) currentToken . type = tokForEach ;
else if ( k = = translate - > name2key ( " in " ) ) currentToken . type = tokIn ;
else if ( k = = translate - > name2key ( " learn " ) ) currentToken . type = tokLearn ;
else if ( k = = translate - > name2key ( " clear " ) ) currentToken . type = tokClear ;
else if ( k = = translate - > name2key ( " go " ) ) currentToken . type = tokGo ;
else if ( k = = translate - > name2key ( " gox " ) ) currentToken . type = tokGoX ;
else if ( k = = translate - > name2key ( " goy " ) ) currentToken . type = tokGoY ;
else if ( k = = translate - > name2key ( " forward " ) ) currentToken . type = tokForward ;
else if ( k = = translate - > name2key ( " backward " ) ) currentToken . type = tokBackward ;
else if ( k = = translate - > name2key ( " direction " ) ) currentToken . type = tokDirection ;
else if ( k = = translate - > name2key ( " turnleft " ) ) currentToken . type = tokTurnLeft ;
else if ( k = = translate - > name2key ( " turnright " ) ) currentToken . type = tokTurnRight ;
else if ( k = = translate - > name2key ( " center " ) ) currentToken . type = tokCenter ;
else if ( k = = translate - > name2key ( " setpenwidth " ) ) currentToken . type = tokSetPenWidth ;
else if ( k = = translate - > name2key ( " penup " ) ) currentToken . type = tokPenUp ;
else if ( k = = translate - > name2key ( " pendown " ) ) currentToken . type = tokPenDown ;
else if ( k = = translate - > name2key ( " setfgcolor " ) ) currentToken . type = tokSetFgColor ;
else if ( k = = translate - > name2key ( " setbgcolor " ) ) currentToken . type = tokSetBgColor ;
else if ( k = = translate - > name2key ( " resizecanvas " ) ) currentToken . type = tokResizeCanvas ;
else if ( k = = translate - > name2key ( " spriteshow " ) ) currentToken . type = tokSpriteShow ;
else if ( k = = translate - > name2key ( " spritehide " ) ) currentToken . type = tokSpriteHide ;
else if ( k = = translate - > name2key ( " spritepress " ) ) currentToken . type = tokSpritePress ;
else if ( k = = translate - > name2key ( " spritechange " ) ) currentToken . type = tokSpriteChange ;
else if ( k = = translate - > name2key ( " do " ) ) currentToken . type = tokDo ; // dummy commands
else if ( k = = translate - > name2key ( " message " ) ) currentToken . type = tokMessage ;
else if ( k = = translate - > name2key ( " inputwindow " ) ) currentToken . type = tokInputWindow ;
else if ( k = = translate - > name2key ( " print " ) ) currentToken . type = tokPrint ;
else if ( k = = translate - > name2key ( " fonttype " ) ) currentToken . type = tokFontType ;
else if ( k = = translate - > name2key ( " fontsize " ) ) currentToken . type = tokFontSize ;
else if ( k = = translate - > name2key ( " repeat " ) ) currentToken . type = tokRepeat ;
else if ( k = = translate - > name2key ( " random " ) ) currentToken . type = tokRandom ;
else if ( k = = translate - > name2key ( " wait " ) ) currentToken . type = tokWait ;
else if ( k = = translate - > name2key ( " wrapon " ) ) currentToken . type = tokWrapOn ;
else if ( k = = translate - > name2key ( " wrapoff " ) ) currentToken . type = tokWrapOff ;
else if ( k = = translate - > name2key ( " reset " ) ) currentToken . type = tokReset ;
else
{
kdDebug ( 0 ) < < " Lexer::setTokenType, found UNKNOWN word @ ( " < < currentToken . start . row < < " , " < < currentToken . start . col < < " ), can be anything " < < endl ;
// t.type = tokUnknown; is allready
}
kdDebug ( 0 ) < < " Lexer::setTokenType, found tok-number: ' " < < currentToken . type < < " ' with the key: ' " < < k < < " ' @ ( " < < currentToken . start . row < < " , " < < currentToken . start . col < < " ) " < < endl ;
}
}
void Lexer : : skipSpaces ( )
{
// kdDebug(0)<<"Lexer::skipSpaces(), skipping SPACES."<<endl;
TQChar currentChar = getChar ( ) ;
// when the Separator_* groups can be identified in the TQChar thing would be easier
while ( ! inputStream - > atEnd ( ) & & ( currentChar . isSpace ( ) & & ! ( currentChar = = ' \x0a ' | | currentChar = = ' \n ' ) ) )
{
currentChar = getChar ( ) ;
}
ungetChar ( currentChar ) ; // unget the tokEOL we likely just found
}
int Lexer : : getNumber ( Value & num , TQString & look )
{
// by reference the value (Value) and look part are set
// kdDebug(0)<<"Lexer::getNumber()"<<endl;
bool hasPoint = false ;
TQChar currentChar = getChar ( ) ;
if ( currentChar . isNumber ( ) )
{
while ( ( currentChar . isNumber ( ) | | ( currentChar = = ' . ' & & ! hasPoint ) ) & & ! inputStream - > atEnd ( ) )
{
if ( currentChar = = ' . ' )
{
hasPoint = true ;
}
look + = currentChar ;
currentChar = getChar ( ) ;
}
ungetChar ( currentChar ) ; //read one too much
num . setNumber ( look . toDouble ( ) ) ;
kdDebug ( 0 ) < < " Lexer::getNumber(), got NUMBER: ' " < < num . Number ( ) < < " ' " < < endl ;
return tokNumber ;
}
else return tokError ;
}
void Lexer : : getString ( Token & currentToken )
{
TQString str = " \" " ; // start with a " cauz it just got lost
TQChar currentChar = TQChar ( ) ; // start empty
while ( currentChar ! = ' " ' & & ! ( currentChar = = ' \x0a ' | | currentChar = = ' \n ' ) & & ! inputStream - > atEnd ( ) )
{
currentChar = getChar ( ) ;
if ( currentChar = = ' \\ ' ) // escape sequence
{
currentChar = getChar ( ) ;
switch ( currentChar )
{
case ' n ' : str + = ' \n ' ; break ;
case ' t ' : str + = ' \t ' ; break ;
case ' f ' : str + = ' \f ' ; break ;
case ' " ' : str + = ' " ' ; break ;
}
}
else if ( currentChar = = ' \x0a ' | | currentChar = = ' \n ' ) // if the user forgot to delimit the string
{
ungetChar ( currentChar ) ;
break ;
}
else str + = currentChar ;
}
currentToken . type = tokString ;
currentToken . look = str ;
kdDebug ( 0 ) < < " Lexer::getStringConstant, got STRINGCONSTANT: " < < currentToken . look < < " ' " < < endl ;
}