You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
412 lines
13 KiB
C++
412 lines
13 KiB
C++
/*
|
|
Copyright (C) 2003 by Walter Schreppers
|
|
Copyright (C) 2004 by Cies Breijs
|
|
|
|
This program is free software; you can redistribute it and/or
|
|
modify it under the terms of version 2 of the GNU General Public
|
|
License as published by the Free Software Foundation.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; if not, write to the Free Software
|
|
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
|
*/
|
|
|
|
#include <tqdom.h>
|
|
#include <tqfile.h>
|
|
|
|
#include <kdebug.h>
|
|
#include <tdelocale.h>
|
|
|
|
#include "settings.h"
|
|
|
|
#include "lexer.h"
|
|
|
|
|
|
Lexer::Lexer(TQTextIStream& iStream)
|
|
{
|
|
inputStream = &iStream;
|
|
row = 1;
|
|
col = 1;
|
|
prevCol = 1;
|
|
translate = new Translate();
|
|
}
|
|
|
|
|
|
Token Lexer::lex()
|
|
{
|
|
skipSpaces(); // skips the white space that it quite likely (indentation) infront of the Token
|
|
|
|
Token currentToken;
|
|
currentToken.type = tokNotSet; // not really needed
|
|
currentToken.look = "";
|
|
currentToken.value = 0;
|
|
currentToken.start.row = row;
|
|
currentToken.start.col = col;
|
|
|
|
TQChar currentChar = getChar();
|
|
|
|
if ( inputStream->atEnd() )
|
|
{
|
|
kdDebug(0)<<"Lexer::lex(), got EOF."<<endl;
|
|
currentToken.type = tokEOF;
|
|
currentToken.look = "EOF";
|
|
ungetChar(currentChar); // unget the currentChar and fix the row/col values
|
|
return currentToken;
|
|
}
|
|
|
|
if (currentChar == '#')
|
|
{
|
|
while ( !inputStream->atEnd() && !(currentChar == '\x0a' || currentChar == '\n') )
|
|
currentChar = getChar();
|
|
}
|
|
|
|
// if (currentChar.category() == TQChar::Separator_Line) somehow doesnt work
|
|
if (currentChar == '\x0a' || currentChar == '\n')
|
|
{
|
|
currentToken.type = tokEOL;
|
|
currentToken.look = "EOL";
|
|
}
|
|
else if (currentChar.isLetter() || currentChar == '[' || currentChar == ']')
|
|
{
|
|
ungetChar(currentChar);
|
|
// sets currentToken.look by reference, and set the currentToken.type to tokUnknown
|
|
currentToken.type = getWord(currentToken.look);
|
|
setTokenType(currentToken); // gets the actual tokenType
|
|
}
|
|
else if ( currentChar.isNumber() )
|
|
{
|
|
ungetChar(currentChar);
|
|
// set currentToken.value/look by reference, and set the currentToken.type to tokNumber
|
|
currentToken.type = getNumber(currentToken.value, currentToken.look);
|
|
}
|
|
else if (currentChar == '>')
|
|
{
|
|
currentChar = getChar();
|
|
if (currentChar == '=')
|
|
{
|
|
currentToken.type = tokGe;
|
|
currentToken.look = ">=";
|
|
}
|
|
else
|
|
{
|
|
ungetChar(currentChar);
|
|
currentToken.type = tokGt;
|
|
currentToken.look = ">";
|
|
}
|
|
}
|
|
else if (currentChar == '<')
|
|
{
|
|
currentChar = getChar();
|
|
if ( currentChar == '=' )
|
|
{
|
|
currentToken.type = tokLe;
|
|
currentToken.look = "<=";
|
|
}
|
|
else
|
|
{
|
|
ungetChar(currentChar);
|
|
currentToken.type = tokLt;
|
|
currentToken.look = ">";
|
|
}
|
|
}
|
|
else if (currentChar == '!')
|
|
{
|
|
currentChar = getChar();
|
|
if (currentChar == '=')
|
|
{
|
|
currentToken.type = tokNe;
|
|
currentToken.look = "!=";
|
|
}
|
|
else
|
|
{
|
|
ungetChar(currentChar);
|
|
currentToken.type = tokNot;
|
|
currentToken.look = "!";
|
|
}
|
|
}
|
|
else if (currentChar == '=')
|
|
{
|
|
currentChar = getChar();
|
|
if (currentChar == '=')
|
|
{
|
|
currentToken.type = tokEq;
|
|
currentToken.look = "==";
|
|
}
|
|
else
|
|
{
|
|
ungetChar(currentChar);
|
|
currentToken.type = tokAssign;
|
|
currentToken.look = "=";
|
|
}
|
|
}
|
|
else if (currentChar == '(')
|
|
{
|
|
currentToken.type = tokBraceOpen;
|
|
currentToken.look = "(";
|
|
}
|
|
else if (currentChar == ')')
|
|
{
|
|
currentToken.type = tokBraceClose;
|
|
currentToken.look = ")";
|
|
}
|
|
else if (currentChar == '+')
|
|
{
|
|
currentToken.type = tokPlus;
|
|
currentToken.look = "+";
|
|
}
|
|
else if (currentChar == '-')
|
|
{
|
|
currentToken.type = tokMinus;
|
|
currentToken.look = "-";
|
|
}
|
|
else if (currentChar == '*')
|
|
{
|
|
currentToken.type = tokMul;
|
|
currentToken.look = "*";
|
|
}
|
|
else if (currentChar == '/')
|
|
{
|
|
currentToken.type = tokDev;
|
|
currentToken.look = "/";
|
|
}
|
|
else if (currentChar == ',')
|
|
{
|
|
currentToken.type = tokComma;
|
|
currentToken.look = ",";
|
|
}
|
|
else if (currentChar == '"')
|
|
{
|
|
getString(currentToken);
|
|
}
|
|
else
|
|
{
|
|
currentToken.type = tokUnknown;
|
|
currentToken.look = currentChar;
|
|
}
|
|
|
|
currentToken.end.row = row;
|
|
currentToken.end.col = col;
|
|
return currentToken;
|
|
}
|
|
|
|
|
|
|
|
// PRIVATEs
|
|
|
|
TQChar Lexer::getChar()
|
|
{
|
|
TQChar c;
|
|
if ( !putBackChar.isNull() )
|
|
{
|
|
c = putBackChar; // use the char that is stored to be put back
|
|
// kdDebug(0)<<"Lexer::getChar(), restored: '"<<c<<"' @ ("<<row<<", "<<col<<")"<<endl;
|
|
putBackChar = TQChar(); // and set putBackChar back to NULL
|
|
if (c == '\x0a' || c == '\n')
|
|
{
|
|
row++;
|
|
prevCol = col;
|
|
col = 1;
|
|
}
|
|
else
|
|
{
|
|
col++;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
*inputStream >> c; // take a TQChar of the inputStream
|
|
// kdDebug(0)<<"Lexer::getChar(): '"<<c<<"' @ ("<<row<<", "<<col<<")"<<endl;
|
|
if (c == '\x0a' || c == '\n')
|
|
{
|
|
row++;
|
|
prevCol = col;
|
|
col = 1;
|
|
}
|
|
else
|
|
{
|
|
col++;
|
|
}
|
|
}
|
|
return c;
|
|
}
|
|
|
|
void Lexer::ungetChar(TQChar c)
|
|
{
|
|
if (c == '\x0a' || c == '\n')
|
|
{
|
|
row--;
|
|
col = prevCol;
|
|
}
|
|
else
|
|
{
|
|
col--;
|
|
}
|
|
putBackChar = c;
|
|
// kdDebug(0)<<"Lexer::ungetChar(), saved char: '"<<c<<"' and steped back to ("<<row<<", "<<col<<")"<<endl;
|
|
}
|
|
|
|
int Lexer::getWord(TQString& word)
|
|
{
|
|
// kdDebug(0)<<"Lexer::getWord()"<<endl;
|
|
TQChar currentChar = getChar();
|
|
if ( currentChar.isLetter() || currentChar == '[' || currentChar == ']' ) {
|
|
while ( ( currentChar.isLetterOrNumber() || currentChar == '_' || currentChar == '[' || currentChar == ']' ) && !inputStream->atEnd() )
|
|
{
|
|
word += currentChar;
|
|
currentChar = getChar();
|
|
}
|
|
kdDebug(0)<<"Lexer::getWord(), got NAME: '"<<word<<"'"<<endl;
|
|
ungetChar(currentChar); //read one too much
|
|
return tokUnknown; // returns tokUnknown, actual token is to be determained later in Lexer::setTokenType
|
|
}
|
|
else return tokError;
|
|
}
|
|
|
|
void Lexer::setTokenType(Token& currentToken)
|
|
{
|
|
if (currentToken.type == tokUnknown)
|
|
{
|
|
// make lowercase copy of the word as it was found in the inputStream
|
|
TQString k = currentToken.look.lower();
|
|
// if the key is an alias translate that alias to a key
|
|
if ( !translate->alias2key(k).isEmpty() ) k = translate->alias2key(k);
|
|
|
|
if (k == translate->name2key("begin") ) currentToken.type = tokBegin;
|
|
else if (k == translate->name2key("end") ) currentToken.type = tokEnd;
|
|
else if (k == translate->name2key("while") ) currentToken.type = tokWhile;
|
|
else if (k == translate->name2key("if") ) currentToken.type = tokIf;
|
|
else if (k == translate->name2key("else") ) currentToken.type = tokElse;
|
|
else if (k == translate->name2key("for") ) currentToken.type = tokFor;
|
|
else if (k == translate->name2key("to") ) currentToken.type = tokTo;
|
|
else if (k == translate->name2key("step") ) currentToken.type = tokStep;
|
|
else if (k == translate->name2key("and") ) currentToken.type = tokAnd;
|
|
else if (k == translate->name2key("or") ) currentToken.type = tokOr;
|
|
else if (k == translate->name2key("not") ) currentToken.type = tokNot;
|
|
else if (k == translate->name2key("return") ) currentToken.type = tokReturn;
|
|
else if (k == translate->name2key("break") ) currentToken.type = tokBreak;
|
|
else if (k == translate->name2key("run") ) currentToken.type = tokRun;
|
|
else if (k == translate->name2key("foreach") ) currentToken.type = tokForEach;
|
|
else if (k == translate->name2key("in") ) currentToken.type = tokIn;
|
|
|
|
else if (k == translate->name2key("learn") ) currentToken.type = tokLearn;
|
|
|
|
else if (k == translate->name2key("clear") ) currentToken.type = tokClear;
|
|
else if (k == translate->name2key("go") ) currentToken.type = tokGo;
|
|
else if (k == translate->name2key("gox") ) currentToken.type = tokGoX;
|
|
else if (k == translate->name2key("goy") ) currentToken.type = tokGoY;
|
|
else if (k == translate->name2key("forward") ) currentToken.type = tokForward;
|
|
else if (k == translate->name2key("backward") ) currentToken.type = tokBackward;
|
|
else if (k == translate->name2key("direction") ) currentToken.type = tokDirection;
|
|
else if (k == translate->name2key("turnleft") ) currentToken.type = tokTurnLeft;
|
|
else if (k == translate->name2key("turnright") ) currentToken.type = tokTurnRight;
|
|
else if (k == translate->name2key("center") ) currentToken.type = tokCenter;
|
|
else if (k == translate->name2key("setpenwidth") ) currentToken.type = tokSetPenWidth;
|
|
else if (k == translate->name2key("penup") ) currentToken.type = tokPenUp;
|
|
else if (k == translate->name2key("pendown") ) currentToken.type = tokPenDown;
|
|
else if (k == translate->name2key("setfgcolor") ) currentToken.type = tokSetFgColor;
|
|
else if (k == translate->name2key("setbgcolor") ) currentToken.type = tokSetBgColor;
|
|
else if (k == translate->name2key("resizecanvas") ) currentToken.type = tokResizeCanvas;
|
|
else if (k == translate->name2key("spriteshow") ) currentToken.type = tokSpriteShow;
|
|
else if (k == translate->name2key("spritehide") ) currentToken.type = tokSpriteHide;
|
|
else if (k == translate->name2key("spritepress") ) currentToken.type = tokSpritePress;
|
|
else if (k == translate->name2key("spritechange") ) currentToken.type = tokSpriteChange;
|
|
|
|
else if (k == translate->name2key("do") ) currentToken.type = tokDo; // dummy commands
|
|
|
|
else if (k == translate->name2key("message") ) currentToken.type = tokMessage;
|
|
else if (k == translate->name2key("inputwindow") ) currentToken.type = tokInputWindow;
|
|
else if (k == translate->name2key("print") ) currentToken.type = tokPrint;
|
|
else if (k == translate->name2key("fonttype") ) currentToken.type = tokFontType;
|
|
else if (k == translate->name2key("fontsize") ) currentToken.type = tokFontSize;
|
|
else if (k == translate->name2key("repeat") ) currentToken.type = tokRepeat;
|
|
else if (k == translate->name2key("random") ) currentToken.type = tokRandom;
|
|
else if (k == translate->name2key("wait") ) currentToken.type = tokWait;
|
|
else if (k == translate->name2key("wrapon") ) currentToken.type = tokWrapOn;
|
|
else if (k == translate->name2key("wrapoff") ) currentToken.type = tokWrapOff;
|
|
else if (k == translate->name2key("reset") ) currentToken.type = tokReset;
|
|
else
|
|
{
|
|
kdDebug(0)<<"Lexer::setTokenType, found UNKNOWN word @ ("<<currentToken.start.row<<", "<<currentToken.start.col<<"), can be anything"<<endl;
|
|
// t.type = tokUnknown; is allready
|
|
}
|
|
|
|
kdDebug(0)<<"Lexer::setTokenType, found tok-number: '"<<currentToken.type<<"' with the key: '"<<k<<"' @ ("<<currentToken.start.row<<", "<<currentToken.start.col<<")"<<endl;
|
|
}
|
|
}
|
|
|
|
|
|
void Lexer::skipSpaces()
|
|
{
|
|
// kdDebug(0)<<"Lexer::skipSpaces(), skipping SPACES."<<endl;
|
|
TQChar currentChar = getChar();
|
|
// when the Separator_* groups can be identified in the TQChar thing would be easier
|
|
while ( !inputStream->atEnd() && ( currentChar.isSpace() && !(currentChar == '\x0a' || currentChar == '\n') ) )
|
|
{
|
|
currentChar = getChar();
|
|
}
|
|
ungetChar(currentChar); // unget the tokEOL we likely just found
|
|
}
|
|
|
|
|
|
int Lexer::getNumber(Value& num, TQString& look)
|
|
{
|
|
// by reference the value (Value) and look part are set
|
|
// kdDebug(0)<<"Lexer::getNumber()"<<endl;
|
|
bool hasPoint = false;
|
|
TQChar currentChar = getChar();
|
|
if ( currentChar.isNumber() )
|
|
{
|
|
while ( ( currentChar.isNumber() || (currentChar == '.' && !hasPoint) ) && !inputStream->atEnd() )
|
|
{
|
|
if (currentChar == '.')
|
|
{
|
|
hasPoint = true;
|
|
}
|
|
look += currentChar;
|
|
currentChar = getChar();
|
|
}
|
|
ungetChar(currentChar); //read one too much
|
|
num.setNumber( look.toDouble() );
|
|
kdDebug(0)<<"Lexer::getNumber(), got NUMBER: '"<<num.Number()<<"'"<<endl;
|
|
return tokNumber;
|
|
}
|
|
else return tokError;
|
|
}
|
|
|
|
void Lexer::getString(Token& currentToken)
|
|
{
|
|
TQString str = "\""; // start with a " cauz it just got lost
|
|
TQChar currentChar = TQChar(); // start empty
|
|
while ( currentChar != '"' && !(currentChar == '\x0a' || currentChar == '\n') && !inputStream->atEnd() )
|
|
{
|
|
currentChar = getChar();
|
|
if (currentChar == '\\') // escape sequence
|
|
{
|
|
currentChar = getChar();
|
|
switch (currentChar)
|
|
{
|
|
case 'n': str += '\n'; break;
|
|
case 't': str += '\t'; break;
|
|
case 'f': str += '\f'; break;
|
|
case '"': str += '"'; break;
|
|
}
|
|
}
|
|
else if (currentChar == '\x0a' || currentChar == '\n') // if the user forgot to delimit the string
|
|
{
|
|
ungetChar(currentChar);
|
|
break;
|
|
}
|
|
else str += currentChar;
|
|
}
|
|
currentToken.type = tokString;
|
|
currentToken.look = str;
|
|
|
|
kdDebug(0)<<"Lexer::getStringConstant, got STRINGCONSTANT: "<<currentToken.look<<"'"<<endl;
|
|
}
|
|
|