/* This file is part of TDevelop Copyright (C) 2002,2003 Roberto Raggi This library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more details. You should have received a copy of the GNU Library General Public License along with this library; see the file COPYING.LIB. If not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ #ifndef LEXER_H #define LEXER_H #include "driver.h" #include #include #include #include #include #include #include #define CHARTYPE TQChar enum Type { Token_eof = 0, Token_identifier = 1000, Token_number_literal, Token_char_literal, Token_string_literal, Token_whitespaces, Token_comment, Token_preproc, Token_assign = 2000, Token_ptrmem, Token_ellipsis, Token_scope, Token_shift, Token_eq, Token_leq, Token_geq, Token_incr, Token_decr, Token_arrow, Token_concat, Token_K_DCOP, Token_k_dcop, Token_k_dcop_signals, Token_Q_OBJECT, Token_, Token_signals, Token_slots, Token_emit, Token_foreach, Token_and, Token_and_eq, Token_asm, Token_auto, Token_bitand, Token_bitor, Token_bool, Token_break, Token_case, Token_catch, Token_char, Token_class, Token_compl, Token_const, Token_const_cast, Token_continue, Token_default, Token_delete, Token_do, Token_double, Token_dynamic_cast, Token_else, Token_enum, Token_explicit, Token_export, Token_extern, Token_false, Token_float, Token_for, Token_friend, Token_goto, Token_if, Token_inline, Token_int, Token_long, Token_mutable, Token_namespace, Token_new, Token_not, Token_not_eq, Token_operator, Token_or, Token_or_eq, Token_private, Token_protected, Token_public, Token_register, Token_reinterpret_cast, Token_return, Token_short, Token_signed, Token_sizeof, Token_static, Token_static_cast, Token_struct, Token_switch, Token_template, Token_this, Token_throw, Token_true, Token_try, Token_typedef, Token_typeid, Token_typename, Token_union, Token_unsigned, Token_using, Token_virtual, Token_void, Token_volatile, Token_wchar_t, Token_while, Token_xor, Token_xor_eq }; enum SkipType { SkipWord, SkipWordAndArguments }; struct LexerData; class Token { Token(const TQString &); Token( int type, int position, int length, const TQString& text ); Token( const Token& source ); Token& operator = ( const Token& source ); bool operator == ( const Token& token ) const; operator int () const; public: bool isNull() const; int type() const; void setType( int type ); void getStartPosition( int* line, int* column ) const; void setStartPosition( int line, int column ); void getEndPosition( int* line, int* column ) const; void setEndPosition( int line, int column ); unsigned int length() const; void setLength( unsigned int length ); int position() const; void setPosition( int position ); TQString text() const; private: int m_type; int m_position; int m_length; int m_startLine; int m_startColumn; int m_endLine; int m_endColumn; const TQString & m_text; friend class Lexer; friend class Parser; }; // class Token class Lexer { public: Lexer( Driver* driver ); ~Lexer(); bool recordComments() const; void setRecordComments( bool record ); bool recordWhiteSpaces() const; void setRecordWhiteSpaces( bool record ); bool reportWarnings() const; void setReportWarnings( bool enable ); bool reportMessages() const; void setReportMessages( bool enable ); bool skipWordsEnabled() const; void setSkipWordsEnabled( bool enabled ); bool preprocessorEnabled() const; void setPreprocessorEnabled( bool enabled ); void resetSkipWords(); void addSkipWord( const TQString& word, SkipType skipType=SkipWord, const TQString& str = TQString() ); TQString source() const; void setSource( const TQString& source ); int index() const; void setIndex( int index ); //returns the count of lines that wer skipped due to #ifdef's int skippedLines() const; void reset(); const Token& tokenAt( int position ) const; const Token& nextToken(); const Token& lookAhead( int n ) const; static int toInt( const Token& token ); int tokenPosition( const Token& token ) const; void getTokenPosition( const Token& token, int* line, int* col ); int currentLine() const { return m_currentLine; } int currentColumn() const { return m_currentColumn; } inline const CHARTYPE* offset( int offset ) const { return m_source.unicode() + offset; } inline int getOffset( const TQChar* p ) const { return int(p - (m_source.unicode())); } private: void setEndPtr( const TQChar* c ) { m_endPtr = c; if( m_ptr < m_endPtr ) m_currentChar = *m_ptr; else m_currentChar = TQChar::null; } const TQChar currentChar() const; TQChar peekChar( int n=1 ) const; int currentPosition() const; void insertCurrent( const TQString& str ); void tokenize(); void nextToken( Token& token, bool stopOnNewline=false ); void nextChar(); void nextChar( int n ); void skip( int l, int r ); void readIdentifier(); void readWhiteSpaces( bool skipNewLine=true, bool skipOnlyOnce=false ); void readLineComment(); void readMultiLineComment(); void readCharLiteral(); void readStringLiteral(); void readNumberLiteral(); int findOperator3() const; int findOperator2() const; bool eof() const; // preprocessor (based on an article of Al Stevens on Dr.Dobb's journal) int testIfLevel(); int macroDefined(); TQString readArgument(); int macroPrimary(); int macroMultiplyDivide(); int macroAddSubtract(); int macroRelational(); int macroEquality(); int macroBoolAnd(); int macroBoolXor(); int macroBoolOr(); int macroLogicalAnd(); int macroLogicalOr(); int macroExpression(); void handleDirective( const TQString& directive ); void processDefine( Macro& macro ); void processElse(); void processElif(); void processEndif(); void processIf(); void processIfdef(); void processIfndef(); void processInclude(); void processUndef(); private: LexerData* d; Driver* m_driver; TQPtrVector< Token > m_tokens; int m_size; int m_index; TQString m_source; const TQChar* m_ptr; const TQChar* m_endPtr; TQChar m_currentChar; bool m_recordComments; bool m_recordWhiteSpaces; bool m_startLine; __gnu_cxx::hash_map< HashedString, TQPair > m_words; int m_skippedLines; int m_currentLine; int m_currentColumn; bool m_skipWordsEnabled; // preprocessor TQMemArray m_skipping; TQMemArray m_trueTest; int m_ifLevel; bool m_preprocessorEnabled; bool m_inPreproc; bool m_reportWarnings; bool m_reportMessages; private: Lexer( const Lexer& source ); void operator = ( const Lexer& source ); }; inline Token::Token(const TQString & text) : m_type( -1 ), m_position( 0 ), m_length( 0 ), m_text( text ) { } inline Token::Token( int type, int position, int length, const TQString& text ) : m_type( type ), m_position( position ), m_length( length ), m_text( text ) { } inline Token::Token( const Token& source ) : m_type( source.m_type ), m_position( source.m_position ), m_length( source.m_length ), m_startLine( source.m_startLine ), m_startColumn( source.m_startColumn ), m_endLine( source.m_endLine ), m_endColumn( source.m_endColumn ), m_text( source.m_text ) { } inline Token& Token::operator = ( const Token& source ) { m_type = source.m_type; m_position = source.m_position; m_length = source.m_length; m_startLine = source.m_startLine; m_startColumn = source.m_startColumn; m_endLine = source.m_endLine; m_endColumn = source.m_endColumn; // m_text = source.m_text; return( *this ); } inline Token::operator int () const { return m_type; } inline bool Token::operator == ( const Token& token ) const { return m_type == token.m_type && m_position == token.m_position && m_length == token.m_length && m_startLine == token.m_startLine && m_startColumn == token.m_startColumn && m_endLine == token.m_endLine && m_endColumn == token.m_endColumn && m_text == token.m_text; } inline bool Token::isNull() const { return m_type == Token_eof || m_length == 0; } inline int Token::type() const { return m_type; } inline void Token::setType( int type ) { m_type = type; } inline int Token::position() const { return m_position; } inline TQString Token::text() const { return m_text.mid(m_position, m_length); } inline void Token::setStartPosition( int line, int column ) { m_startLine = line; m_startColumn = column; } inline void Token::setEndPosition( int line, int column ) { m_endLine = line; m_endColumn = column; } inline void Token::getStartPosition( int* line, int* column ) const { if( line ) *line = m_startLine; if( column ) *column = m_startColumn; } inline void Token::getEndPosition( int* line, int* column ) const { if( line ) *line = m_endLine; if( column ) *column = m_endColumn; } inline void Token::setPosition( int position ) { m_position = position; } inline unsigned int Token::length() const { return m_length; } inline void Token::setLength( unsigned int length ) { m_length = length; } inline bool Lexer::recordComments() const { return m_recordComments; } inline void Lexer::setRecordComments( bool record ) { m_recordComments = record; } inline bool Lexer::recordWhiteSpaces() const { return m_recordWhiteSpaces; } inline void Lexer::setRecordWhiteSpaces( bool record ) { m_recordWhiteSpaces = record; } inline TQString Lexer::source() const { return m_source; } inline int Lexer::index() const { return m_index; } inline void Lexer::setIndex( int index ) { m_index = index; } inline const Token& Lexer::nextToken() { if( m_index < m_size ) return *m_tokens[ m_index++ ]; return *m_tokens[ m_index ]; } inline const Token& Lexer::tokenAt( int n ) const { return *m_tokens[ TQMIN(n, m_size-1) ]; } inline const Token& Lexer::lookAhead( int n ) const { return *m_tokens[ TQMIN(m_index + n, m_size-1) ]; } inline int Lexer::tokenPosition( const Token& token ) const { return token.position(); } inline void Lexer::nextChar() { if(*m_ptr == '\n') { ++m_currentLine; m_currentColumn = 0; m_startLine = true; } else { ++m_currentColumn; } ++m_ptr; if( m_ptr < m_endPtr ) m_currentChar = *m_ptr; else m_currentChar = TQChar::null; } inline void Lexer::nextChar( int n ) { m_currentColumn += n; m_ptr += n; if( m_ptr < m_endPtr ) m_currentChar = *m_ptr; else m_currentChar = TQChar::null; } inline void Lexer::readIdentifier() { while( currentChar().isLetterOrNumber() || currentChar() == '_' ) nextChar(); } inline void Lexer::readWhiteSpaces( bool skipNewLine, bool skipOnlyOnce ) { while( !currentChar().isNull() ){ TQChar ch = currentChar(); if( ch == '\n' && !skipNewLine ){ break; } else if( ch.isSpace() ){ nextChar(); } else if( m_inPreproc && currentChar() == '\\' ){ nextChar(); readWhiteSpaces( true, true ); } else { break; } if( skipOnlyOnce && ch == '\n' ) { skipNewLine = false; } } } //little hack for better performance inline bool isTodo( const TQString& txt, int position ) { if( txt.length() < position + 4 ) return false; return (txt[ position ] == 't' || txt[ position ] == 'T') && (txt[ position+1 ] == 'o' || txt[ position+1 ] == 'O') && (txt[ position+2 ] == 'd' || txt[ position+2 ] == 'D') && (txt[ position+3 ] == 'o' || txt[ position+3 ] == 'O'); } inline bool isFixme( const TQString& txt, int position ) { if( txt.length() < position + 5 ) return false; return (txt[ position ] == 'f' || txt[ position ] == 'F') && (txt[ position+1 ] == 'i' || txt[ position+1 ] == 'I') && (txt[ position+2 ] == 'x' || txt[ position+2 ] == 'X') && (txt[ position+3 ] == 'm' || txt[ position+3 ] == 'M') && (txt[ position+4 ] == 'e' || txt[ position+4 ] == 'E'); } inline void Lexer::readLineComment() { while( !currentChar().isNull() && currentChar() != '\n' ){ if( m_reportMessages && isTodo( m_source, currentPosition() ) ){ nextChar( 4 ); TQString msg; int line = m_currentLine; int col = m_currentColumn; while( currentChar() ){ if( currentChar() == '*' && peekChar() == '/' ) break; else if( currentChar() == '\n' ) break; msg += currentChar(); nextChar(); } m_driver->addProblem( m_driver->currentFileName(), Problem(msg, line, col, Problem::Level_Todo) ); } else if( m_reportMessages && isFixme( m_source, currentPosition() ) ){ nextChar( 5 ); TQString msg; int line = m_currentLine; int col = m_currentColumn; while( currentChar() ){ if( currentChar() == '*' && peekChar() == '/' ) break; else if( currentChar() == '\n' ) break; msg += currentChar(); nextChar(); } m_driver->addProblem( m_driver->currentFileName(), Problem(msg, line, col, Problem::Level_Fixme) ); } else nextChar(); } } inline void Lexer::readMultiLineComment() { while( !currentChar().isNull() ){ if( currentChar() == '*' && peekChar() == '/' ){ nextChar( 2 ); return; } else if( m_reportMessages && isTodo( m_source, currentPosition() ) ){ nextChar( 4 ); TQString msg; int line = m_currentLine; int col = m_currentColumn; while( currentChar() ){ if( currentChar() == '*' && peekChar() == '/' ) break; else if( currentChar() == '\n' ) break; msg += currentChar(); nextChar(); } m_driver->addProblem( m_driver->currentFileName(), Problem(msg, line, col, Problem::Level_Todo) ); } else if( m_reportMessages && isFixme( m_source, currentPosition() ) ) { nextChar( 5 ); TQString msg; int line = m_currentLine; int col = m_currentColumn; while( currentChar() ){ if( currentChar() == '*' && peekChar() == '/' ) break; else if( currentChar() == '\n' ) break; msg += currentChar(); nextChar(); } m_driver->addProblem( m_driver->currentFileName(), Problem(msg, line, col, Problem::Level_Fixme) ); } else nextChar(); } } inline void Lexer::readCharLiteral() { if( currentChar() == '\'' ) nextChar(); // skip ' else if( currentChar() == 'L' && peekChar() == '\'' ) nextChar( 2 ); // slip L' else return; while( !currentChar().isNull() ){ int len = getOffset( m_endPtr ) - currentPosition(); if( len>=2 && (currentChar() == '\\' && peekChar() == '\'') ){ nextChar( 2 ); } else if( len>=2 && (currentChar() == '\\' && peekChar() == '\\') ){ nextChar( 2 ); } else if( currentChar() == '\'' ){ nextChar(); break; } else { nextChar(); } } } inline void Lexer::readStringLiteral() { if( currentChar() != '"' ) return; nextChar(); // skip " while( !currentChar().isNull() ){ int len = getOffset( m_endPtr ) - currentPosition(); if( len>=2 && currentChar() == '\\' && peekChar() == '"' ){ nextChar( 2 ); } else if( len>=2 && currentChar() == '\\' && peekChar() == '\\' ){ nextChar( 2 ); } else if( currentChar() == '"' ){ nextChar(); break; } else { nextChar(); } } } inline void Lexer::readNumberLiteral() { while( currentChar().isLetterOrNumber() || currentChar() == '.' ) nextChar(); } inline int Lexer::findOperator3() const { int n = getOffset( m_endPtr ) - currentPosition(); if( n >= 3){ TQChar ch = currentChar(), ch1=peekChar(), ch2=peekChar(2); if( ch == '<' && ch1 == '<' && ch2 == '=' ) return Token_assign; else if( ch == '>' && ch1 == '>' && ch2 == '=' ) return Token_assign; else if( ch == '-' && ch1 == '>' && ch2 == '*' ) return Token_ptrmem; else if( ch == '.' && ch1 == '.' && ch2 == '.' ) return Token_ellipsis; } return -1; } inline int Lexer::findOperator2() const { int n = getOffset( m_endPtr ) - currentPosition(); if( n>=2 ){ TQChar ch = currentChar(), ch1=peekChar(); if( ch == ':' && ch1 == ':' ) return Token_scope; else if( ch == '.' && ch1 == '*' ) return Token_ptrmem; else if( ch == '+' && ch1 == '=' ) return Token_assign; else if( ch == '-' && ch1 == '=' ) return Token_assign; else if( ch == '*' && ch1 == '=' ) return Token_assign; else if( ch == '/' && ch1 == '=' ) return Token_assign; else if( ch == '%' && ch1 == '=' ) return Token_assign; else if( ch == '^' && ch1 == '=' ) return Token_assign; else if( ch == '&' && ch1 == '=' ) return Token_assign; else if( ch == '|' && ch1 == '=' ) return Token_assign; else if( ch == '<' && ch1 == '<' ) return Token_shift; else if( ch == '>' && ch1 == '>' ) return Token_shift; else if( ch == '=' && ch1 == '=' ) return Token_eq; else if( ch == '!' && ch1 == '=' ) return Token_eq; else if( ch == '<' && ch1 == '=' ) return Token_leq; else if( ch == '>' && ch1 == '=' ) return Token_geq; else if( ch == '&' && ch1 == '&' ) return Token_and; else if( ch == '|' && ch1 == '|' ) return Token_or; else if( ch == '+' && ch1 == '+' ) return Token_incr; else if( ch == '-' && ch1 == '-' ) return Token_decr; else if( ch == '-' && ch1 == '>' ) return Token_arrow; else if( ch == '#' && ch1 == '#' ) return Token_concat; } return -1; } inline bool Lexer::skipWordsEnabled() const { return m_skipWordsEnabled; } inline void Lexer::setSkipWordsEnabled( bool enabled ) { m_skipWordsEnabled = enabled; } inline bool Lexer::preprocessorEnabled() const { return m_preprocessorEnabled; } inline void Lexer::setPreprocessorEnabled( bool enabled ) { m_preprocessorEnabled = enabled; } inline int Lexer::currentPosition() const { return getOffset( m_ptr ); } inline const TQChar Lexer::currentChar() const { return m_currentChar; } inline TQChar Lexer::peekChar( int n ) const { const TQChar* p = m_ptr + n; if( p < m_endPtr ) return *p; else return TQChar::null; } inline bool Lexer::eof() const { return m_ptr >= m_endPtr; } inline bool Lexer::reportWarnings() const { return m_reportWarnings; } inline void Lexer::setReportWarnings( bool enable ) { m_reportWarnings = enable; } inline bool Lexer::reportMessages() const { return m_reportMessages; } inline void Lexer::setReportMessages( bool enable ) { m_reportMessages = enable; } inline void Lexer::insertCurrent( const TQString& str ) { int posi = currentPosition(); m_source.insert( posi, str ); m_ptr = offset( posi ); m_endPtr = offset( m_source.length() ); if( m_ptr < m_endPtr ) m_currentChar = *m_ptr; else m_currentChar = TQChar::null; } #endif