You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
tdesdk/poxml/antlr/src/CharScanner.cpp

431 lines
11 KiB

/**
* <b>SOFTWARE RIGHTS</b>
* <p>
* ANTLR 2.6.0 MageLang Insitute, 1998
* <p>
* We reserve no legal rights to the ANTLR--it is fully in the
* public domain. An individual or company may do whatever
* they wish with source code distributed with ANTLR or the
* code generated by ANTLR, including the incorporation of
* ANTLR, or its output, into commerical software.
* <p>
* We encourage users to develop software with ANTLR. However,
* we do ask that credit is given to us for developing
* ANTLR. By "credit", we mean that if you use ANTLR or
* incorporate any source code into one of your programs
* (commercial product, research project, or otherwise) that
* you acknowledge this fact somewhere in the documentation,
* research report, etc... If you like ANTLR and have
* developed a nice tool with the output, please mention that
* you developed it using ANTLR. In addition, we ask that the
* headers remain intact in our source code. As long as these
* guidelines are kept, we expect to continue enhancing this
* system and expect to make other tools available as they are
* completed.
* <p>
* The ANTLR gang:
* @version ANTLR 2.6.0 MageLang Insitute, 1998
* @author Terence Parr, <a href=http://www.MageLang.com>MageLang Institute</a>
* @author <br>John Lilley, <a href=http://www.Empathy.com>Empathy Software</a>
* @author <br><a href="mailto:pete@yamuna.demon.co.uk">Pete Wells</a>
*/
#include "antlr/CharScanner.h"
#include "antlr/CommonToken.h"
#include "antlr/MismatchedCharException.h"
#include <map>
#ifdef HAS_NOT_CCTYPE_H
#include <ctype.h>
#else
#include <cctype>
#endif
#include <iostream>
#ifdef HAS_NOT_CSTRING_H
#include <string>
#else
#include <cstring>
#endif
#include <stdlib.h>
ANTLR_BEGIN_NAMESPACE(antlr)
ANTLR_C_USING(exit)
ANTLR_C_USING(tolower)
#ifdef ANTLR_REALLY_NO_STRCASECMP
// Apparently, neither strcasecmp nor stricmp is standard, and Codewarrior
// on the mac has neither...
inline int strcasecmp(const char *s1, const char *s2)
{
while (true)
{
char c1 = tolower(*s1++),
c2 = tolower(*s2++);
if (c1 < c2) return -1;
if (c1 > c2) return 1;
if (c1 == 0) return 0;
}
}
#else
#ifdef NO_STRCASECMP
ANTLR_C_USING(stricmp)
#else
ANTLR_C_USING(strcasecmp)
#endif
#endif
CharScannerLiteralsLess::CharScannerLiteralsLess(const CharScanner* theScanner)
: scanner(theScanner)
{}
bool CharScannerLiteralsLess::operator() (const ANTLR_USE_NAMESPACE(std)string& x,const ANTLR_USE_NAMESPACE(std)string& y) const
{
if (scanner->getCaseSensitiveLiterals()) {
return ANTLR_USE_NAMESPACE(std)less<ANTLR_USE_NAMESPACE(std)string>()(x,y);
} else {
#ifdef NO_STRCASECMP
return (stricmp(x.c_str(),y.c_str())<0);
#else
return (strcasecmp(x.c_str(),y.c_str())<0);
#endif
}
}
CharScanner::CharScanner(InputBuffer& cb)
: saveConsumedInput(true) //, caseSensitiveLiterals(true)
, literals(CharScannerLiteralsLess(this))
, inputState(new LexerInputState(cb))
, commitToPath(false)
, traceDepth(0)
{
setTokenObjectFactory(&CommonToken::factory);
}
CharScanner::CharScanner(InputBuffer* cb)
: saveConsumedInput(true) //, caseSensitiveLiterals(true)
, literals(CharScannerLiteralsLess(this))
, inputState(new LexerInputState(cb))
, commitToPath(false)
, traceDepth(0)
{
setTokenObjectFactory(&CommonToken::factory);
}
CharScanner::CharScanner(const LexerSharedInputState& state)
: saveConsumedInput(true) //, caseSensitiveLiterals(true)
, literals(CharScannerLiteralsLess(this))
, inputState(state)
, commitToPath(false)
, traceDepth(0)
{
setTokenObjectFactory(&CommonToken::factory);
}
CharScanner::~CharScanner()
{
}
void CharScanner::append(char c)
{
if (saveConsumedInput) {
int l = text.length();
if ((l%256) == 0) text.reserve(l+256);
text.replace(l,0,&c,1);
}
}
void CharScanner::append(const ANTLR_USE_NAMESPACE(std)string& s)
{
if (saveConsumedInput)
text+=s;
}
void CharScanner::commit()
{
inputState->getInput().commit();
}
void CharScanner::consume()
{
if (inputState->guessing == 0) {
int c = LA(1);
if (caseSensitive) {
append(c);
} else {
// use input.LA(), not LA(), to get original case
// CharScanner.LA() would toLower it.
append(inputState->getInput().LA(1));
}
if (c == '\t') {
tab();
}
else {
inputState->column++;
}
}
inputState->getInput().consume();
}
/** Consume chars until one matches the given char */
void CharScanner::consumeUntil(int c)
{
while (LA(1) != EOF_CHAR && LA(1) != c)
{
consume();
}
}
/** Consume chars until one matches the given set */
void CharScanner::consumeUntil(const BitSet& set)
{
while (LA(1) != EOF_CHAR && !set.member(LA(1))) {
consume();
}
}
bool CharScanner::getCaseSensitive() const
{ return caseSensitive; }
//bool CharScanner::getCaseSensitiveLiterals() const
//{ return caseSensitiveLiterals; }
int CharScanner::getColumn() const
{ return inputState->column; }
void CharScanner::setColumn(int c)
{ inputState->column = c; }
bool CharScanner::getCommitToPath() const
{ return commitToPath; }
const ANTLR_USE_NAMESPACE(std)string& CharScanner::getFilename() const
{ return inputState->filename; }
InputBuffer& CharScanner::getInputBuffer()
{ return inputState->getInput(); }
LexerSharedInputState CharScanner::getInputState()
{ return inputState; }
int CharScanner::getLine() const
{ return inputState->line; }
/** return a copy of the current text buffer */
const ANTLR_USE_NAMESPACE(std)string& CharScanner::getText() const
{ return text; }
RefToken CharScanner::getTokenObject() const
{ return _returnToken; }
RefToken CharScanner::makeToken(int t)
{
RefToken tok=tokenFactory();
tok->setType(t);
tok->setColumn(inputState->tokenStartColumn);
tok->setLine(inputState->tokenStartLine);
return tok;
}
int CharScanner::mark()
{
return inputState->getInput().mark();
}
void CharScanner::match(int c)
{
if ( LA(1) != c ) {
throw MismatchedCharException(LA(1),c,false,this);
}
consume();
}
void CharScanner::match(const BitSet& b)
{
if (!b.member(LA(1))) {
throw MismatchedCharException(LA(1),b,false,this);
}
consume();
}
void CharScanner::match(const ANTLR_USE_NAMESPACE(std)string& s)
{
int len = s.length();
for (int i=0; i<len; i++) {
if ( LA(1) != s[i] ) {
throw MismatchedCharException(LA(1),s[i],false,this);
}
consume();
}
}
void CharScanner::matchNot(int c)
{
if ( LA(1) == c ) {
throw MismatchedCharException(LA(1),c,true,this);
}
consume();
}
void CharScanner::matchRange(int c1, int c2)
{
if (LA(1)<c1 || LA(1)>c2) {
throw MismatchedCharException(LA(1),c1,c2,false,this);
}
consume();
}
void CharScanner::newline()
{
++inputState->line;
inputState->column=1;
}
/** advance the current column number by an appropriate amount.
* If you do not override this to specify how much to jump for
* a tab, then tabs are counted as one char. This method is
* called from consume().
*/
void CharScanner::tab() {
// update inputState->column as function of
// inputState->column and tab stops.
// For example, if tab stops are columns 1 and 5 etc...
// and column is 3, then add 2 to column.
++inputState->column;
}
void CharScanner::panic()
{
ANTLR_USE_NAMESPACE(std)cerr << "CharScanner: panic" << ANTLR_USE_NAMESPACE(std)endl;
exit(1);
}
void CharScanner::panic(const ANTLR_USE_NAMESPACE(std)string& s)
{
ANTLR_USE_NAMESPACE(std)cerr << "CharScanner: panic: " << s.c_str() << ANTLR_USE_NAMESPACE(std)endl;
exit(1);
}
/** Report exception errors caught in nextToken() */
void CharScanner::reportError(const RecognitionException& ex)
{
ANTLR_USE_NAMESPACE(std)cerr << ex.toString().c_str() << ANTLR_USE_NAMESPACE(std)endl;
}
/** Parser error-reporting function can be overridden in subclass */
void CharScanner::reportError(const ANTLR_USE_NAMESPACE(std)string& s)
{
if (getFilename().empty())
ANTLR_USE_NAMESPACE(std)cerr << "error: " << s.c_str() << ANTLR_USE_NAMESPACE(std)endl;
else
ANTLR_USE_NAMESPACE(std)cerr << getFilename().c_str() << ": error: " << s.c_str() << ANTLR_USE_NAMESPACE(std)endl;
}
/** Parser warning-reporting function can be overridden in subclass */
void CharScanner::reportWarning(const ANTLR_USE_NAMESPACE(std)string& s)
{
if (getFilename().empty())
ANTLR_USE_NAMESPACE(std)cerr << "warning: " << s.c_str() << ANTLR_USE_NAMESPACE(std)endl;
else
ANTLR_USE_NAMESPACE(std)cerr << getFilename().c_str() << ": warning: " << s.c_str() << ANTLR_USE_NAMESPACE(std)endl;
}
void CharScanner::resetText()
{
text="";
inputState->tokenStartColumn = inputState->column;
inputState->tokenStartLine = inputState->line;
}
void CharScanner::rewind(int pos)
{
inputState->getInput().rewind(pos);
}
void CharScanner::setCaseSensitive(bool t)
{
caseSensitive = t;
}
void CharScanner::setCommitToPath(bool commit)
{
commitToPath = commit;
}
void CharScanner::setFilename(const ANTLR_USE_NAMESPACE(std)string& f)
{ inputState->filename=f; }
void CharScanner::setInputState(LexerSharedInputState state)
{ inputState = state; }
void CharScanner::setLine(int l)
{ inputState->line=l; }
void CharScanner::setText(const ANTLR_USE_NAMESPACE(std)string& s)
{ text=s; }
void CharScanner::setTokenObjectFactory(factory_type factory)
{ tokenFactory=factory; }
/** Test the token text against the literals table
* Override this method to perform a different literals test */
int CharScanner::testLiteralsTable(int ttype) const
{
ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,int,CharScannerLiteralsLess>::const_iterator i = literals.find(text);
if (i != literals.end())
ttype = (*i).second;
return ttype;
}
/** Test the text passed in against the literals table
* Override this method to perform a different literals test
* This is used primarily when you want to test a portion of
* a token.
*/
int CharScanner::testLiteralsTable(const ANTLR_USE_NAMESPACE(std)string& text_, int ttype) const
{
ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,int,CharScannerLiteralsLess>::const_iterator i = literals.find(text_);
if (i != literals.end())
ttype = (*i).second;
return ttype;
}
/** Override this method to get more specific case handling */
int CharScanner::toLower(int c) const
{
return tolower(c);
}
void CharScanner::traceIndent()
{
for( int i = 0; i < traceDepth; i++ )
ANTLR_USE_NAMESPACE(std)cout << " ";
}
void CharScanner::traceIn(const ANTLR_USE_NAMESPACE(std)string& rname)
{
traceDepth++;
traceIndent();
ANTLR_USE_NAMESPACE(std)cout << "> lexer " << rname.c_str() << "; c==" << LA(1) << ANTLR_USE_NAMESPACE(std)endl;
}
void CharScanner::traceOut(const ANTLR_USE_NAMESPACE(std)string& rname)
{
traceIndent();
ANTLR_USE_NAMESPACE(std)cout << "< lexer " << rname.c_str() << "; c==" << LA(1) << ANTLR_USE_NAMESPACE(std)endl;
traceDepth--;
}
void CharScanner::uponEOF()
{
}
#ifndef NO_STATIC_CONSTS
const int CharScanner::NO_CHAR;
const int CharScanner::EOF_CHAR;
#endif
ANTLR_END_NAMESPACE