tdevelop/languages/cpp/debugger/mi/milexer.cpp

/***************************************************************************
 *   Copyright (C) 2004 by Roberto Raggi                                   *
 *   roberto@kdevelop.org                                                  *
 *                                                                         *
 *   This program is free software; you can redistribute it and/or modify  *
 *   it under the terms of the GNU Library General Public License as       *
 *   published by the Free Software Foundation; either version 2 of the    *
 *   License, or (at your option) any later version.                       *
 *                                                                         *
 *   This program is distributed in the hope that it will be useful,       *
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
 *   GNU General Public License for more details.                          *
 *                                                                         *
 *   You should have received a copy of the GNU Library General Public     *
 *   License along with this program; if not, write to the                 *
 *   Free Software Foundation, Inc.,                                       *
 *   51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.             *
 ***************************************************************************/

#include "milexer.h"
#include "tokens.h"
#include <cctype>
#include <iostream>

bool MILexer::s_initialized = false;
scan_fun_ptr MILexer::s_scan_table[];


MILexer::MILexer()
{
    if (!s_initialized)
        setupScanTable();
}

MILexer::~MILexer()
{
}

void MILexer::setupScanTable()
{
    s_initialized = true;

    for (int i=0; i<128; ++i) {
        switch (i) {
        case '\n':
            s_scan_table[i] = &MILexer::scanNewline;
            break;

        case '"':
            s_scan_table[i] = &MILexer::scanStringLiteral;
            break;

        default:
            if (isspace(i))
                s_scan_table[i] = &MILexer::scanWhiteSpaces;
            else if (isalpha(i) || i == '_')
                s_scan_table[i] = &MILexer::scanIdentifier;
            else if (isdigit(i))
                s_scan_table[i] = &MILexer::scanNumberLiteral;
            else
                s_scan_table[i] = &MILexer::scanChar;
        }
    }

    s_scan_table[128] = &MILexer::scanUnicodeChar;
}

/*

    m_firstToken = m_tokens.data();
    m_currentToken = 0;

    m_firstToken = m_tokens.data();
    m_currentToken = m_firstToken;
 */

TokenStream *MILexer::tokenize(const FileSymbol *fileSymbol)
{
    m_tokensCount = 0;
    m_tokens.resize(64);

    m_contents = fileSymbol->contents;
    m_length = m_contents.length();
    m_ptr = 0;

    m_lines.resize(8);
    m_line = 0;

    m_lines[m_line++] = 0;

    m_cursor = 0;

    // tokenize
    int pos, len;

    for (;;) {
        if (m_tokensCount == (int)m_tokens.size())
            m_tokens.resize(m_tokensCount * 2);

        Token &tk = m_tokens[m_tokensCount++];
        tk.kind = nextToken(pos, len);
        tk.position = pos;
        tk.length = len;

        if (tk.kind == 0)
            break;
    }

    TokenStream *tokenStream = new TokenStream;
    tokenStream->m_contents = m_contents;

    tokenStream->m_lines = m_lines;
    tokenStream->m_line = m_line;

    tokenStream->m_tokens = m_tokens;
    tokenStream->m_tokensCount = m_tokensCount;

    tokenStream->m_firstToken = tokenStream->m_tokens.data();
    tokenStream->m_currentToken = tokenStream->m_firstToken;;

    tokenStream->m_cursor = m_cursor;

    return tokenStream;
}

int MILexer::nextToken(int &pos, int &len)
{
    int start = 0;
    int kind = 0;
    unsigned char ch = 0;

    while (m_ptr < m_length) {
        start = m_ptr;

        ch = (unsigned char)m_contents[m_ptr];
        (this->*s_scan_table[ch < 128 ? ch : 128])(&kind);

        switch (kind) {
            case Token_whitespaces:
            case '\n':
                break;

            default:
                pos = start;
                len = m_ptr - start;
                return kind;
        }

        if (kind == 0)
            break;
    }

    return 0;
}

void MILexer::scanChar(int *kind)
{
    *kind = m_contents[m_ptr++];
}

void MILexer::scanWhiteSpaces(int *kind)
{
    *kind = Token_whitespaces;

    char ch;
    while (m_ptr < m_length) {
        ch = m_contents[m_ptr];
        if (!(isspace(ch) && ch != '\n'))
            break;

        ++m_ptr;
    }
}

void MILexer::scanNewline(int *kind)
{
    if (m_line == (int)m_lines.size())
        m_lines.resize(m_lines.size() * 2);

    if (m_lines.at(m_line) < m_ptr)
        m_lines[m_line++] = m_ptr;

    *kind = m_contents[m_ptr++];
}

void MILexer::scanUnicodeChar(int *kind)
{
    *kind = m_contents[m_ptr++];
}

void MILexer::scanStringLiteral(int *kind)
{
    ++m_ptr;
    while (char c = m_contents[m_ptr]) {
        switch (c) {
        case '\n':
            // ### error
            *kind = Token_string_literal;
            return;
        case '\\':
            {
                char next = m_contents.at(m_ptr+1);
                if (next == '"' || next == '\\')
                    m_ptr += 2;
                else
                    ++m_ptr;
            }
            break;
        case '"':
            ++m_ptr;
            *kind = Token_string_literal;
            return;
        default:
            ++m_ptr;
            break;
        }
    }

    // ### error
    *kind = Token_string_literal;
}

void MILexer::scanIdentifier(int *kind)
{
    char ch;
    while (m_ptr < m_length) {
        ch = m_contents[m_ptr];
        if (!(isalnum(ch) || ch == '-' || ch == '_'))
            break;

        ++m_ptr;
    }

    *kind = Token_identifier;
}

void MILexer::scanNumberLiteral(int *kind)
{
    char ch;
    while (m_ptr < m_length) {
        ch = m_contents[m_ptr];
        if (!(isalnum(ch) || ch == '.'))
            break;

        ++m_ptr;
    }

    // ### finish to implement me!!
    *kind = Token_number_literal;
}

void TokenStream::positionAt(int position, int *line, int *column) const
{
    if (!(line && column && !m_lines.isEmpty()))
        return;

    int first = 0;
    int len = m_line;
    int half;
    int middle;

    while (len > 0) {
        half = len >> 1;
        middle = first;

        middle += half;

        if (m_lines[middle] < position) {
            first = middle;
            ++first;
            len = len - half - 1;
        }
        else
            len = half;
    }

    *line = QMAX(first - 1, 0);
    *column = position - m_lines.at(*line);

    Q_ASSERT( *column >= 0 );
}

QCString TokenStream::tokenText(int index) const
{
    Token *t = index < 0 ? m_currentToken : m_firstToken + index;
    const char* data = m_contents;
    return QCString(data + t->position, t->length+1);
}
Copy the KDE 3.5 branch to branches/trinity for new KDE 3.5 features. BUG:215923 git-svn-id: svn://anonsvn.kde.org/home/kde/branches/trinity/kdevelop@1054174 283d02a7-25f6-0310-bc7c-ecb5cbfe19da 15 years ago			`/***************************************************************************`
			`* Copyright (C) 2004 by Roberto Raggi *`
			`* roberto@kdevelop.org *`
			`* *`
			`* This program is free software; you can redistribute it and/or modify *`
			`* it under the terms of the GNU Library General Public License as *`
			`* published by the Free Software Foundation; either version 2 of the *`
			`* License, or (at your option) any later version. *`
			`* *`
			`* This program is distributed in the hope that it will be useful, *`
			`* but WITHOUT ANY WARRANTY; without even the implied warranty of *`
			`* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *`
			`* GNU General Public License for more details. *`
			`* *`
			`* You should have received a copy of the GNU Library General Public *`
			`* License along with this program; if not, write to the *`
			`* Free Software Foundation, Inc., *`
Fixed remaining GPL address zip codes git-svn-id: svn://anonsvn.kde.org/home/kde/branches/trinity/kdevelop@1070598 283d02a7-25f6-0310-bc7c-ecb5cbfe19da 15 years ago			`* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *`
Copy the KDE 3.5 branch to branches/trinity for new KDE 3.5 features. BUG:215923 git-svn-id: svn://anonsvn.kde.org/home/kde/branches/trinity/kdevelop@1054174 283d02a7-25f6-0310-bc7c-ecb5cbfe19da 15 years ago			`***************************************************************************/`

			`#include "milexer.h"`
			`#include "tokens.h"`
			`#include <cctype>`
			`#include <iostream>`

			`bool MILexer::s_initialized = false;`
			`scan_fun_ptr MILexer::s_scan_table[];`


			`MILexer::MILexer()`
			`{`
			`if (!s_initialized)`
			`setupScanTable();`
			`}`

			`MILexer::~MILexer()`
			`{`
			`}`

			`void MILexer::setupScanTable()`
			`{`
			`s_initialized = true;`

			`for (int i=0; i<128; ++i) {`
			`switch (i) {`
			`case '\n':`
			`s_scan_table[i] = &MILexer::scanNewline;`
			`break;`

			`case '"':`
			`s_scan_table[i] = &MILexer::scanStringLiteral;`
			`break;`

			`default:`
			`if (isspace(i))`
			`s_scan_table[i] = &MILexer::scanWhiteSpaces;`
			`else if (isalpha(i) \|\| i == '_')`
			`s_scan_table[i] = &MILexer::scanIdentifier;`
			`else if (isdigit(i))`
			`s_scan_table[i] = &MILexer::scanNumberLiteral;`
			`else`
			`s_scan_table[i] = &MILexer::scanChar;`
			`}`
			`}`

			`s_scan_table[128] = &MILexer::scanUnicodeChar;`
			`}`

			`/*`

			`m_firstToken = m_tokens.data();`
			`m_currentToken = 0;`

			`m_firstToken = m_tokens.data();`
			`m_currentToken = m_firstToken;`
			`*/`

			`TokenStream MILexer::tokenize(const FileSymbol fileSymbol)`
			`{`
			`m_tokensCount = 0;`
			`m_tokens.resize(64);`

			`m_contents = fileSymbol->contents;`
			`m_length = m_contents.length();`
			`m_ptr = 0;`

			`m_lines.resize(8);`
			`m_line = 0;`

			`m_lines[m_line++] = 0;`

			`m_cursor = 0;`

			`// tokenize`
			`int pos, len;`

			`for (;;) {`
			`if (m_tokensCount == (int)m_tokens.size())`
			`m_tokens.resize(m_tokensCount * 2);`

			`Token &tk = m_tokens[m_tokensCount++];`
			`tk.kind = nextToken(pos, len);`
			`tk.position = pos;`
			`tk.length = len;`

			`if (tk.kind == 0)`
			`break;`
			`}`

			`TokenStream *tokenStream = new TokenStream;`
			`tokenStream->m_contents = m_contents;`

			`tokenStream->m_lines = m_lines;`
			`tokenStream->m_line = m_line;`

			`tokenStream->m_tokens = m_tokens;`
			`tokenStream->m_tokensCount = m_tokensCount;`

			`tokenStream->m_firstToken = tokenStream->m_tokens.data();`
			`tokenStream->m_currentToken = tokenStream->m_firstToken;;`

			`tokenStream->m_cursor = m_cursor;`

			`return tokenStream;`
			`}`

			`int MILexer::nextToken(int &pos, int &len)`
			`{`
			`int start = 0;`
			`int kind = 0;`
			`unsigned char ch = 0;`

			`while (m_ptr < m_length) {`
			`start = m_ptr;`

			`ch = (unsigned char)m_contents[m_ptr];`
			`(this->*s_scan_table[ch < 128 ? ch : 128])(&kind);`

			`switch (kind) {`
			`case Token_whitespaces:`
			`case '\n':`
			`break;`

			`default:`
			`pos = start;`
			`len = m_ptr - start;`
			`return kind;`
			`}`

			`if (kind == 0)`
			`break;`
			`}`

			`return 0;`
			`}`

			`void MILexer::scanChar(int *kind)`
			`{`
			`*kind = m_contents[m_ptr++];`
			`}`

			`void MILexer::scanWhiteSpaces(int *kind)`
			`{`
			`*kind = Token_whitespaces;`

			`char ch;`
			`while (m_ptr < m_length) {`
			`ch = m_contents[m_ptr];`
			`if (!(isspace(ch) && ch != '\n'))`
			`break;`

			`++m_ptr;`
			`}`
			`}`

			`void MILexer::scanNewline(int *kind)`
			`{`
			`if (m_line == (int)m_lines.size())`
			`m_lines.resize(m_lines.size() * 2);`

			`if (m_lines.at(m_line) < m_ptr)`
			`m_lines[m_line++] = m_ptr;`

			`*kind = m_contents[m_ptr++];`
			`}`

			`void MILexer::scanUnicodeChar(int *kind)`
			`{`
			`*kind = m_contents[m_ptr++];`
			`}`

			`void MILexer::scanStringLiteral(int *kind)`
			`{`
			`++m_ptr;`
			`while (char c = m_contents[m_ptr]) {`
			`switch (c) {`
			`case '\n':`
			`// ### error`
			`*kind = Token_string_literal;`
			`return;`
			`case '\\':`
			`{`
			`char next = m_contents.at(m_ptr+1);`
			`if (next == '"' \|\| next == '\\')`
			`m_ptr += 2;`
			`else`
			`++m_ptr;`
			`}`
			`break;`
			`case '"':`
			`++m_ptr;`
			`*kind = Token_string_literal;`
			`return;`
			`default:`
			`++m_ptr;`
			`break;`
			`}`
			`}`

			`// ### error`
			`*kind = Token_string_literal;`
			`}`

			`void MILexer::scanIdentifier(int *kind)`
			`{`
			`char ch;`
			`while (m_ptr < m_length) {`
			`ch = m_contents[m_ptr];`
			`if (!(isalnum(ch) \|\| ch == '-' \|\| ch == '_'))`
			`break;`

			`++m_ptr;`
			`}`

			`*kind = Token_identifier;`
			`}`

			`void MILexer::scanNumberLiteral(int *kind)`
			`{`
			`char ch;`
			`while (m_ptr < m_length) {`
			`ch = m_contents[m_ptr];`
			`if (!(isalnum(ch) \|\| ch == '.'))`
			`break;`

			`++m_ptr;`
			`}`

			`// ### finish to implement me!!`
			`*kind = Token_number_literal;`
			`}`

			`void TokenStream::positionAt(int position, int line, int column) const`
			`{`
			`if (!(line && column && !m_lines.isEmpty()))`
			`return;`

			`int first = 0;`
			`int len = m_line;`
			`int half;`
			`int middle;`

			`while (len > 0) {`
			`half = len >> 1;`
			`middle = first;`

			`middle += half;`

			`if (m_lines[middle] < position) {`
			`first = middle;`
			`++first;`
			`len = len - half - 1;`
			`}`
			`else`
			`len = half;`
			`}`

			`*line = QMAX(first - 1, 0);`
			`column = position - m_lines.at(line);`

			`Q_ASSERT( *column >= 0 );`
			`}`

			`QCString TokenStream::tokenText(int index) const`
			`{`
			`Token *t = index < 0 ? m_currentToken : m_firstToken + index;`
			`const char* data = m_contents;`
			`return QCString(data + t->position, t->length+1);`
			`}`