You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
291 lines
6.8 KiB
291 lines
6.8 KiB
/***************************************************************************
|
|
* Copyright (C) 2004 by Roberto Raggi *
|
|
* roberto@kdevelop.org *
|
|
* *
|
|
* This program is free software; you can redistribute it and/or modify *
|
|
* it under the terms of the GNU Library General Public License as *
|
|
* published by the Free Software Foundation; either version 2 of the *
|
|
* License, or (at your option) any later version. *
|
|
* *
|
|
* This program is distributed in the hope that it will be useful, *
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
|
|
* GNU General Public License for more details. *
|
|
* *
|
|
* You should have received a copy of the GNU Library General Public *
|
|
* License along with this program; if not, write to the *
|
|
* Free Software Foundation, Inc., *
|
|
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
|
|
***************************************************************************/
|
|
|
|
#include "milexer.h"
|
|
#include "tokens.h"
|
|
#include <cctype>
|
|
#include <iostream>
|
|
|
|
bool MILexer::s_initialized = false;
|
|
scan_fun_ptr MILexer::s_scan_table[];
|
|
|
|
|
|
MILexer::MILexer()
|
|
{
|
|
if (!s_initialized)
|
|
setupScanTable();
|
|
}
|
|
|
|
MILexer::~MILexer()
|
|
{
|
|
}
|
|
|
|
void MILexer::setupScanTable()
|
|
{
|
|
s_initialized = true;
|
|
|
|
for (int i=0; i<128; ++i) {
|
|
switch (i) {
|
|
case '\n':
|
|
s_scan_table[i] = &MILexer::scanNewline;
|
|
break;
|
|
|
|
case '"':
|
|
s_scan_table[i] = &MILexer::scanStringLiteral;
|
|
break;
|
|
|
|
default:
|
|
if (isspace(i))
|
|
s_scan_table[i] = &MILexer::scanWhiteSpaces;
|
|
else if (isalpha(i) || i == '_')
|
|
s_scan_table[i] = &MILexer::scanIdentifier;
|
|
else if (isdigit(i))
|
|
s_scan_table[i] = &MILexer::scanNumberLiteral;
|
|
else
|
|
s_scan_table[i] = &MILexer::scanChar;
|
|
}
|
|
}
|
|
|
|
s_scan_table[128] = &MILexer::scanUnicodeChar;
|
|
}
|
|
|
|
/*
|
|
|
|
m_firstToken = m_tokens.data();
|
|
m_currentToken = 0;
|
|
|
|
m_firstToken = m_tokens.data();
|
|
m_currentToken = m_firstToken;
|
|
*/
|
|
|
|
TokenStream *MILexer::tokenize(const FileSymbol *fileSymbol)
|
|
{
|
|
m_tokensCount = 0;
|
|
m_tokens.resize(64);
|
|
|
|
m_contents = fileSymbol->contents;
|
|
m_length = m_contents.length();
|
|
m_ptr = 0;
|
|
|
|
m_lines.resize(8);
|
|
m_line = 0;
|
|
|
|
m_lines[m_line++] = 0;
|
|
|
|
m_cursor = 0;
|
|
|
|
// tokenize
|
|
int pos, len;
|
|
|
|
for (;;) {
|
|
if (m_tokensCount == (int)m_tokens.size())
|
|
m_tokens.resize(m_tokensCount * 2);
|
|
|
|
Token &tk = m_tokens[m_tokensCount++];
|
|
tk.kind = nextToken(pos, len);
|
|
tk.position = pos;
|
|
tk.length = len;
|
|
|
|
if (tk.kind == 0)
|
|
break;
|
|
}
|
|
|
|
TokenStream *tokenStream = new TokenStream;
|
|
tokenStream->m_contents = m_contents;
|
|
|
|
tokenStream->m_lines = m_lines;
|
|
tokenStream->m_line = m_line;
|
|
|
|
tokenStream->m_tokens = m_tokens;
|
|
tokenStream->m_tokensCount = m_tokensCount;
|
|
|
|
tokenStream->m_firstToken = tokenStream->m_tokens.data();
|
|
tokenStream->m_currentToken = tokenStream->m_firstToken;;
|
|
|
|
tokenStream->m_cursor = m_cursor;
|
|
|
|
return tokenStream;
|
|
}
|
|
|
|
int MILexer::nextToken(int &pos, int &len)
|
|
{
|
|
int start = 0;
|
|
int kind = 0;
|
|
unsigned char ch = 0;
|
|
|
|
while (m_ptr < m_length) {
|
|
start = m_ptr;
|
|
|
|
ch = (unsigned char)m_contents[m_ptr];
|
|
(this->*s_scan_table[ch < 128 ? ch : 128])(&kind);
|
|
|
|
switch (kind) {
|
|
case Token_whitespaces:
|
|
case '\n':
|
|
break;
|
|
|
|
default:
|
|
pos = start;
|
|
len = m_ptr - start;
|
|
return kind;
|
|
}
|
|
|
|
if (kind == 0)
|
|
break;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
void MILexer::scanChar(int *kind)
|
|
{
|
|
*kind = m_contents[m_ptr++];
|
|
}
|
|
|
|
void MILexer::scanWhiteSpaces(int *kind)
|
|
{
|
|
*kind = Token_whitespaces;
|
|
|
|
char ch;
|
|
while (m_ptr < m_length) {
|
|
ch = m_contents[m_ptr];
|
|
if (!(isspace(ch) && ch != '\n'))
|
|
break;
|
|
|
|
++m_ptr;
|
|
}
|
|
}
|
|
|
|
void MILexer::scanNewline(int *kind)
|
|
{
|
|
if (m_line == (int)m_lines.size())
|
|
m_lines.resize(m_lines.size() * 2);
|
|
|
|
if (m_lines.at(m_line) < m_ptr)
|
|
m_lines[m_line++] = m_ptr;
|
|
|
|
*kind = m_contents[m_ptr++];
|
|
}
|
|
|
|
void MILexer::scanUnicodeChar(int *kind)
|
|
{
|
|
*kind = m_contents[m_ptr++];
|
|
}
|
|
|
|
void MILexer::scanStringLiteral(int *kind)
|
|
{
|
|
++m_ptr;
|
|
while (char c = m_contents[m_ptr]) {
|
|
switch (c) {
|
|
case '\n':
|
|
// ### error
|
|
*kind = Token_string_literal;
|
|
return;
|
|
case '\\':
|
|
{
|
|
char next = m_contents.at(m_ptr+1);
|
|
if (next == '"' || next == '\\')
|
|
m_ptr += 2;
|
|
else
|
|
++m_ptr;
|
|
}
|
|
break;
|
|
case '"':
|
|
++m_ptr;
|
|
*kind = Token_string_literal;
|
|
return;
|
|
default:
|
|
++m_ptr;
|
|
break;
|
|
}
|
|
}
|
|
|
|
// ### error
|
|
*kind = Token_string_literal;
|
|
}
|
|
|
|
void MILexer::scanIdentifier(int *kind)
|
|
{
|
|
char ch;
|
|
while (m_ptr < m_length) {
|
|
ch = m_contents[m_ptr];
|
|
if (!(isalnum(ch) || ch == '-' || ch == '_'))
|
|
break;
|
|
|
|
++m_ptr;
|
|
}
|
|
|
|
*kind = Token_identifier;
|
|
}
|
|
|
|
void MILexer::scanNumberLiteral(int *kind)
|
|
{
|
|
char ch;
|
|
while (m_ptr < m_length) {
|
|
ch = m_contents[m_ptr];
|
|
if (!(isalnum(ch) || ch == '.'))
|
|
break;
|
|
|
|
++m_ptr;
|
|
}
|
|
|
|
// ### finish to implement me!!
|
|
*kind = Token_number_literal;
|
|
}
|
|
|
|
void TokenStream::positionAt(int position, int *line, int *column) const
|
|
{
|
|
if (!(line && column && !m_lines.isEmpty()))
|
|
return;
|
|
|
|
int first = 0;
|
|
int len = m_line;
|
|
int half;
|
|
int middle;
|
|
|
|
while (len > 0) {
|
|
half = len >> 1;
|
|
middle = first;
|
|
|
|
middle += half;
|
|
|
|
if (m_lines[middle] < position) {
|
|
first = middle;
|
|
++first;
|
|
len = len - half - 1;
|
|
}
|
|
else
|
|
len = half;
|
|
}
|
|
|
|
*line = TQMAX(first - 1, 0);
|
|
*column = position - m_lines.at(*line);
|
|
|
|
Q_ASSERT( *column >= 0 );
|
|
}
|
|
|
|
TQCString TokenStream::tokenText(int index) const
|
|
{
|
|
Token *t = index < 0 ? m_currentToken : m_firstToken + index;
|
|
const char* data = m_contents;
|
|
return TQCString(data + t->position, t->length+1);
|
|
}
|
|
|