You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

652 lines
15 KiB

/* -*- c++ -*-
This file is part of KSieve,
the KDE internet mail/usenet news message filtering library.
Copyright (c) 2002-2003 Marc Mutz <>
KSieve is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License, version 2, as
published by the Free Software Foundation.
KSieve is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
In addition, as a special exception, the copyright holders give
permission to link the code of this program with any edition of
the Qt library by Trolltech AS, Norway (or with modified versions
of Qt that use the same license as Qt), and distribute linked
combinations including the two. You must obey the GNU General
Public License in all respects for all of the code used other than
Qt. If you modify this file, you may extend this exception to
your version of the file, but you are not obligated to do so. If
you do not wish to do so, delete this exception statement from
your version.
#include <config.h>
#include <ksieve/parser.h>
#include <impl/parser.h>
#include <ksieve/error.h>
#include <tqstring.h>
#include <assert.h>
#include <limits.h> // ULONG_MAX
#include <ctype.h> // isdigit
namespace KSieve {
// Parser Bridge implementation
Parser::Parser( const char * scursor, const char * const send, int options )
: i( 0 )
i = new Impl( scursor, send, options );
Parser::~Parser() {
delete i; i = 0;
void Parser::setScriptBuilder( ScriptBuilder * builder ) {
assert( i );
i->mBuilder = builder;
ScriptBuilder * Parser::scriptBuilder() const {
assert( i );
return i->mBuilder;
const Error & Parser::error() const {
assert( i );
return i->error();
bool Parser::parse() {
assert( i );
return i->parse();
static inline unsigned long factorForQuantifier( char ch ) {
switch ( ch ) {
case 'g':
case 'G':
return 1024*1024*1024;
case 'm':
case 'M':
return 1024*1024;
case 'k':
case 'K':
return 1024;
assert( 0 ); // lexer should prohibit this
return 1; // make compiler happy
static inline bool willOverflowULong( unsigned long result, unsigned long add ) {
static const unsigned long maxULongByTen = (unsigned long)(ULONG_MAX / 10.0) ;
return result > maxULongByTen || ULONG_MAX - 10 * result < add ;
namespace KSieve {
// Parser Implementation
Parser::Impl::Impl( const char * scursor, const char * const send, int options )
: mToken( Lexer::None ),
lexer( scursor, send, options ),
mBuilder( 0 )
bool Parser::Impl::isStringToken() const {
return token() == Lexer::QuotedString ||
token() == Lexer::MultiLineString ;
bool Parser::Impl::isArgumentToken() const {
return isStringToken() ||
token() == Lexer::Number ||
token() == Lexer::Tag ||
token() == Lexer::Special && mTokenValue == "[" ;
bool Parser::Impl::obtainToken() {
while ( !mToken && !lexer.atEnd() && !lexer.error() ) {
mToken = lexer.nextToken( mTokenValue );
if ( lexer.error() )
// comments and line feeds are semantically invisible and may
// appear anywhere, so we handle them here centrally:
switch ( token() ) {
case Lexer::HashComment:
if ( scriptBuilder() )
scriptBuilder()->hashComment( tokenValue() );
case Lexer::BracketComment:
if ( scriptBuilder() )
scriptBuilder()->bracketComment( tokenValue() );
case Lexer::LineFeeds:
for ( unsigned int i = 0, end = tokenValue().toUInt() ; i < end ; ++i )
if ( scriptBuilder() ) // better check every iteration, b/c
// we call out to ScriptBuilder,
// where nasty things might happen!
default: ; // make compiler happy
if ( lexer.error() && scriptBuilder() )
scriptBuilder()->error( lexer.error() );
return !lexer.error();
bool Parser::Impl::parse() {
// this is the entry point: START := command-list
if ( !parseCommandList() )
return false;
if ( !atEnd() ) {
makeUnexpectedTokenError( Error::ExpectedCommand );
return false;
if ( scriptBuilder() )
return true;
bool Parser::Impl::parseCommandList() {
// our ABNF:
// command-list := *comand
while ( !atEnd() ) {
if ( !obtainToken() )
return false;
if ( token() == Lexer::None )
if ( token() != Lexer::Identifier )
return true;
if ( !parseCommand() ) {
assert( error() );
return false;
return true;
bool Parser::Impl::parseCommand() {
// command := identifier arguments ( ";" / block )
// arguments := *argument [ test / test-list ]
// block := "{" *command "}"
// our ABNF:
// block := "{" [ command-list ] "}"
if ( atEnd() )
return false;
// identifier
if ( !obtainToken() || token() != Lexer::Identifier )
return false;
if ( scriptBuilder() )
scriptBuilder()->commandStart( tokenValue() );
// *argument
if ( !obtainToken() )
return false;
if ( atEnd() ) {
makeError( Error::MissingSemicolonOrBlock );
return false;
if ( isArgumentToken() && !parseArgumentList() ) {
assert( error() );
return false;
// test / test-list
if ( !obtainToken() )
return false;
if ( atEnd() ) {
makeError( Error::MissingSemicolonOrBlock );
return false;
if ( token() == Lexer::Special && tokenValue() == "(" ) { // test-list
if ( !parseTestList() ) {
assert( error() );
return false;
} else if ( token() == Lexer::Identifier ) { // should be test:
if ( !parseTest() ) {
assert( error() );
return false;
// ";" / block
if ( !obtainToken() )
return false;
if ( atEnd() ) {
makeError( Error::MissingSemicolonOrBlock );
return false;
if ( token() != Lexer::Special ) {
makeUnexpectedTokenError( Error::ExpectedBlockOrSemicolon );
return false;
if ( tokenValue() == ";" )
else if ( tokenValue() == "{" ) { // block
if ( !parseBlock() )
return false; // it's an error since we saw '{'
} else {
makeError( Error::MissingSemicolonOrBlock );
return false;
if ( scriptBuilder() )
return true;
bool Parser::Impl::parseArgumentList() {
// our ABNF:
// argument-list := *argument
while ( !atEnd() ) {
if ( !obtainToken() )
return false;
if ( !isArgumentToken() )
return true;
if ( !parseArgument() )
return !error();
return true;
bool Parser::Impl::parseArgument() {
// argument := string-list / number / tag
if ( !obtainToken() || atEnd() )
return false;
if ( token() == Lexer::Number ) {
if ( !parseNumber() ) {
assert( error() );
return false;
return true;
} else if ( token() == Lexer::Tag ) {
if ( scriptBuilder() )
scriptBuilder()->taggedArgument( tokenValue() );
return true;
} else if ( isStringToken() ) {
if ( scriptBuilder() )
scriptBuilder()->stringArgument( tokenValue(), token() == Lexer::MultiLineString, TQString::null );
return true;
} else if ( token() == Lexer::Special && tokenValue() == "[" ) {
if ( !parseStringList() ) {
assert( error() );
return false;
return true;
return false;
bool Parser::Impl::parseTestList() {
// test-list := "(" test *("," test) ")"
if ( !obtainToken() || atEnd() )
return false;
if ( token() != Lexer::Special || tokenValue() != "(" )
return false;
if ( scriptBuilder() )
// generic while/switch construct for comma-separated lists. See
// parseStringList() for another one. Any fix here is like to apply there, too.
bool lastWasComma = true;
while ( !atEnd() ) {
if ( !obtainToken() )
return false;
switch ( token() ) {
case Lexer::None:
case Lexer::Special:
assert( tokenValue().length() == 1 );
assert( tokenValue()[0].latin1() );
switch ( tokenValue()[0].latin1() ) {
case ')':
if ( lastWasComma ) {
makeError( Error::ConsecutiveCommasInTestList );
return false;
if ( scriptBuilder() )
return true;
case ',':
if( lastWasComma ) {
makeError( Error::ConsecutiveCommasInTestList );
return false;
lastWasComma = true;
makeError( Error::NonStringInStringList );
return false;
case Lexer::Identifier:
if ( !lastWasComma ) {
makeError( Error::MissingCommaInTestList );
return false;
} else {
lastWasComma = false;
if ( !parseTest() ) {
assert( error() );
return false;
makeUnexpectedTokenError( Error::NonTestInTestList );
return false;
makeError( Error::PrematureEndOfTestList );
return false;
bool Parser::Impl::parseTest() {
// test := identifier arguments
// arguments := *argument [ test / test-list ]
// identifier
if ( !obtainToken() || atEnd() )
return false;
if ( token() != Lexer::Identifier )
return false;
if ( scriptBuilder() )
scriptBuilder()->testStart( tokenValue() );
// *argument
if ( !obtainToken() )
return false;
if ( atEnd() ) // a test w/o args
goto TestEnd;
if ( isArgumentToken() && !parseArgumentList() ) {
assert( error() );
return false;
// test / test-list
if ( !obtainToken() )
return false;
if ( atEnd() ) // a test w/o nested tests
goto TestEnd;
if ( token() == Lexer::Special && tokenValue() == "(" ) { // test-list
if ( !parseTestList() ) {
assert( error() );
return false;
} else if ( token() == Lexer::Identifier ) { // should be test:
if ( !parseTest() ) {
assert( error() );
return false;
if ( scriptBuilder() )
return true;
bool Parser::Impl::parseBlock() {
// our ABNF:
// block := "{" [ command-list ] "}"
if ( !obtainToken() || atEnd() )
return false;
if ( token() != Lexer::Special || tokenValue() != "{" )
return false;
if ( scriptBuilder() )
if ( !obtainToken() )
return false;
if ( atEnd() ) {
makeError( Error::PrematureEndOfBlock );
return false;
if ( token() == Lexer::Identifier ) {
if ( !parseCommandList() ) {
assert( error() );
return false;
if ( !obtainToken() )
return false;
if ( atEnd() ) {
makeError( Error::PrematureEndOfBlock );
return false;
if ( token() != Lexer::Special || tokenValue() != "}" ) {
makeError( Error::NonCommandInCommandList );
return false;
if ( scriptBuilder() )
return true;
bool Parser::Impl::parseStringList() {
// string-list := "[" string *("," string) "]" / string
// ;; if there is only a single string, the brackets are optional
// However, since strings are already handled separately from
// string lists in parseArgument(), our ABNF is modified to:
// string-list := "[" string *("," string) "]"
if ( !obtainToken() || atEnd() )
return false;
if ( token() != Lexer::Special || tokenValue() != "[" )
return false;
if ( scriptBuilder() )
// generic while/switch construct for comma-separated lists. See
// parseTestList() for another one. Any fix here is like to apply there, too.
bool lastWasComma = true;
while ( !atEnd() ) {
if ( !obtainToken() )
return false;
switch ( token() ) {
case Lexer::None:
case Lexer::Special:
assert( tokenValue().length() == 1 );
switch ( tokenValue()[0].latin1() ) {
case ']':
if ( lastWasComma ) {
makeError( Error::ConsecutiveCommasInStringList );
return false;
if ( scriptBuilder() )
return true;
case ',':
if ( lastWasComma ) {
makeError( Error::ConsecutiveCommasInStringList );
return false;
lastWasComma = true;
makeError( Error::NonStringInStringList );
return false;
case Lexer::QuotedString:
case Lexer::MultiLineString:
if ( !lastWasComma ) {
makeError( Error::MissingCommaInStringList );
return false;
lastWasComma = false;
if ( scriptBuilder() )
scriptBuilder()->stringListEntry( tokenValue(), token() == Lexer::MultiLineString, TQString::null );
makeError( Error::NonStringInStringList );
return false;
makeError( Error::PrematureEndOfStringList );
return false;
bool Parser::Impl::parseNumber() {
// The lexer returns the number including the quantifier as a
// single token value. Here, we split is an check that the number
// is not out of range:
if ( !obtainToken() || atEnd() )
return false;
if ( token() != Lexer::Number )
return false;
// number:
unsigned long result = 0;
unsigned int i = 0;
const TQCString s = tokenValue().latin1();
for ( const unsigned int len = s.length() ; i < len && isdigit( s[i] ) ; ++i ) {
const unsigned long digitValue = s[i] - '0' ;
if ( willOverflowULong( result, digitValue ) ) {
makeError( Error::NumberOutOfRange );
return false;
} else {
result *= 10 ; result += digitValue ;
// optional quantifier:
char quantifier = '\0';
if ( i < s.length() ) {
assert( i + 1 == s.length() );
quantifier = s[i];
const unsigned long factor = factorForQuantifier( quantifier );
if ( result > double(ULONG_MAX) / double(factor) ) {
makeError( Error::NumberOutOfRange );
return false;
result *= factor;
if ( scriptBuilder() )
scriptBuilder()->numberArgument( result, quantifier );
return true;
} // namespace KSieve