You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1740 lines
47 KiB
1740 lines
47 KiB
/* -*- c++ -*-
|
|
kmime_header_parsing.cpp
|
|
|
|
This file is part of KMime, the KDE internet mail/usenet news message library.
|
|
Copyright (c) 2001-2002 Marc Mutz <mutz@kde.org>
|
|
|
|
KMime is free software; you can redistribute it and/or modify it
|
|
under the terms of the GNU General Public License, version 2, as
|
|
published by the Free Software Foundation.
|
|
|
|
KMime is distributed in the hope that it will be useful, but
|
|
WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this library; if not, write to the Free Software
|
|
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
|
|
In addition, as a special exception, the copyright holders give
|
|
permission to link the code of this library with any edition of
|
|
the TQt library by Trolltech AS, Norway (or with modified versions
|
|
of TQt that use the same license as TQt), and distribute linked
|
|
combinations including the two. You must obey the GNU General
|
|
Public License in all respects for all of the code used other than
|
|
TQt. If you modify this file, you may extend this exception to
|
|
your version of the file, but you are not obligated to do so. If
|
|
you do not wish to do so, delete this exception statement from
|
|
your version.
|
|
*/
|
|
|
|
#include <config.h>
|
|
#include "kmime_header_parsing.h"
|
|
|
|
#include "kmime_codecs.h"
|
|
#include "kmime_util.h"
|
|
#include "kmime_warning.h"
|
|
|
|
#include <kglobal.h>
|
|
#include <kcharsets.h>
|
|
|
|
#include <tqtextcodec.h>
|
|
#include <tqmap.h>
|
|
#include <tqcstring.h>
|
|
#include <tqstringlist.h>
|
|
|
|
#include <ctype.h> // for isdigit
|
|
#include <cassert>
|
|
|
|
using namespace KMime;
|
|
using namespace KMime::Types;
|
|
|
|
namespace KMime {
|
|
|
|
namespace Types {
|
|
|
|
TQString AddrSpec::asString() const {
|
|
bool needsQuotes = false;
|
|
TQString result;
|
|
result.reserve( localPart.length() + domain.length() + 1 );
|
|
for ( unsigned int i = 0 ; i < localPart.length() ; ++i ) {
|
|
const char ch = localPart[i].latin1();
|
|
if ( ch == '.' || isAText( ch ) )
|
|
result += ch;
|
|
else {
|
|
needsQuotes = true;
|
|
if ( ch == '\\' || ch == '"' )
|
|
result += '\\';
|
|
result += ch;
|
|
}
|
|
}
|
|
if ( needsQuotes )
|
|
return '"' + result + "\"@" + domain;
|
|
else
|
|
return result + '@' + domain;
|
|
}
|
|
|
|
}
|
|
|
|
namespace HeaderParsing {
|
|
|
|
// parse the encoded-word (scursor points to after the initial '=')
|
|
bool parseEncodedWord( const char* & scursor, const char * const send,
|
|
TQString & result, TQCString & language ) {
|
|
|
|
// make sure the caller already did a bit of the work.
|
|
assert( *(scursor-1) == '=' );
|
|
|
|
//
|
|
// STEP 1:
|
|
// scan for the charset/language portion of the encoded-word
|
|
//
|
|
|
|
char ch = *scursor++;
|
|
|
|
if ( ch != '?' ) {
|
|
kdDebug() << "first" << endl;
|
|
KMIME_WARN_PREMATURE_END_OF(EncodedWord);
|
|
return false;
|
|
}
|
|
|
|
// remember start of charset (ie. just after the initial "=?") and
|
|
// language (just after the first '*') fields:
|
|
const char * charsetStart = scursor;
|
|
const char * languageStart = 0;
|
|
|
|
// find delimiting '?' (and the '*' separating charset and language
|
|
// tags, if any):
|
|
for ( ; scursor != send ; scursor++ )
|
|
if ( *scursor == '?')
|
|
break;
|
|
else if ( *scursor == '*' && !languageStart )
|
|
languageStart = scursor + 1;
|
|
|
|
// not found? can't be an encoded-word!
|
|
if ( scursor == send || *scursor != '?' ) {
|
|
kdDebug() << "second" << endl;
|
|
KMIME_WARN_PREMATURE_END_OF(EncodedWord);
|
|
return false;
|
|
}
|
|
|
|
// extract the language information, if any (if languageStart is 0,
|
|
// language will be null, too):
|
|
TQCString maybeLanguage( languageStart, scursor - languageStart + 1 /*for NUL*/);
|
|
// extract charset information (keep in mind: the size given to the
|
|
// ctor is one off due to the \0 terminator):
|
|
TQCString maybeCharset( charsetStart, ( languageStart ? languageStart : scursor + 1 ) - charsetStart );
|
|
|
|
//
|
|
// STEP 2:
|
|
// scan for the encoding portion of the encoded-word
|
|
//
|
|
|
|
|
|
// remember start of encoding (just _after_ the second '?'):
|
|
scursor++;
|
|
const char * encodingStart = scursor;
|
|
|
|
// find next '?' (ending the encoding tag):
|
|
for ( ; scursor != send ; scursor++ )
|
|
if ( *scursor == '?' ) break;
|
|
|
|
// not found? Can't be an encoded-word!
|
|
if ( scursor == send || *scursor != '?' ) {
|
|
kdDebug() << "third" << endl;
|
|
KMIME_WARN_PREMATURE_END_OF(EncodedWord);
|
|
return false;
|
|
}
|
|
|
|
// extract the encoding information:
|
|
TQCString maybeEncoding( encodingStart, scursor - encodingStart + 1 );
|
|
|
|
|
|
kdDebug() << "parseEncodedWord: found charset == \"" << maybeCharset
|
|
<< "\"; language == \"" << maybeLanguage
|
|
<< "\"; encoding == \"" << maybeEncoding << "\"" << endl;
|
|
|
|
//
|
|
// STEP 3:
|
|
// scan for encoded-text portion of encoded-word
|
|
//
|
|
|
|
|
|
// remember start of encoded-text (just after the third '?'):
|
|
scursor++;
|
|
const char * encodedTextStart = scursor;
|
|
|
|
// find next '?' (ending the encoded-text):
|
|
for ( ; scursor != send ; scursor++ )
|
|
if ( *scursor == '?' ) break;
|
|
|
|
// not found? Can't be an encoded-word!
|
|
// ### maybe evaluate it nonetheless if the rest is OK?
|
|
if ( scursor == send || *scursor != '?' ) {
|
|
kdDebug() << "fourth" << endl;
|
|
KMIME_WARN_PREMATURE_END_OF(EncodedWord);
|
|
return false;
|
|
}
|
|
scursor++;
|
|
// check for trailing '=':
|
|
if ( scursor == send || *scursor != '=' ) {
|
|
kdDebug() << "fifth" << endl;
|
|
KMIME_WARN_PREMATURE_END_OF(EncodedWord);
|
|
return false;
|
|
}
|
|
scursor++;
|
|
|
|
// set end sentinel for encoded-text:
|
|
const char * const encodedTextEnd = scursor - 2;
|
|
|
|
//
|
|
// STEP 4:
|
|
// setup decoders for the transfer encoding and the charset
|
|
//
|
|
|
|
|
|
// try if there's a codec for the encoding found:
|
|
Codec * codec = Codec::codecForName( maybeEncoding );
|
|
if ( !codec ) {
|
|
KMIME_WARN_UNKNOWN(Encoding,maybeEncoding);
|
|
return false;
|
|
}
|
|
|
|
// get an instance of a corresponding decoder:
|
|
Decoder * dec = codec->makeDecoder();
|
|
assert( dec );
|
|
|
|
// try if there's a (text)codec for the charset found:
|
|
bool matchOK = false;
|
|
TQTextCodec
|
|
*textCodec = KGlobal::charsets()->codecForName( maybeCharset, matchOK );
|
|
|
|
if ( !matchOK || !textCodec ) {
|
|
KMIME_WARN_UNKNOWN(Charset,maybeCharset);
|
|
delete dec;
|
|
return false;
|
|
};
|
|
|
|
kdDebug() << "mimeName(): \"" << textCodec->mimeName() << "\"" << endl;
|
|
|
|
// allocate a temporary buffer to store the 8bit text:
|
|
int encodedTextLength = encodedTextEnd - encodedTextStart;
|
|
TQByteArray buffer( codec->maxDecodedSizeFor( encodedTextLength ) );
|
|
TQByteArray::Iterator bit = buffer.begin();
|
|
TQByteArray::ConstIterator bend = buffer.end();
|
|
|
|
//
|
|
// STEP 5:
|
|
// do the actual decoding
|
|
//
|
|
|
|
if ( !dec->decode( encodedTextStart, encodedTextEnd, bit, bend ) )
|
|
KMIME_WARN << codec->name() << " codec lies about it's maxDecodedSizeFor( "
|
|
<< encodedTextLength << " )\nresult may be truncated" << endl;
|
|
|
|
result = textCodec->toUnicode( buffer.begin(), bit - buffer.begin() );
|
|
|
|
kdDebug() << "result now: \"" << result << "\"" << endl;
|
|
// cleanup:
|
|
delete dec;
|
|
language = maybeLanguage;
|
|
|
|
return true;
|
|
}
|
|
|
|
static inline void eatWhiteSpace( const char* & scursor, const char * const send ) {
|
|
while ( scursor != send
|
|
&& ( *scursor == ' ' || *scursor == '\n' ||
|
|
*scursor == '\t' || *scursor == '\r' ) )
|
|
scursor++;
|
|
}
|
|
|
|
bool parseAtom( const char * & scursor, const char * const send,
|
|
TQString & result, bool allow8Bit )
|
|
{
|
|
TQPair<const char*,int> maybeResult;
|
|
|
|
if ( parseAtom( scursor, send, maybeResult, allow8Bit ) ) {
|
|
result += TQString::fromLatin1( maybeResult.first, maybeResult.second );
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
bool parseAtom( const char * & scursor, const char * const send,
|
|
TQPair<const char*,int> & result, bool allow8Bit ) {
|
|
bool success = false;
|
|
const char * start = scursor;
|
|
|
|
while ( scursor != send ) {
|
|
signed char ch = *scursor++;
|
|
if ( ch > 0 && isAText(ch) ) {
|
|
// AText: OK
|
|
success = true;
|
|
} else if ( allow8Bit && ch < 0 ) {
|
|
// 8bit char: not OK, but be tolerant.
|
|
KMIME_WARN_8BIT(ch);
|
|
success = true;
|
|
} else {
|
|
// CTL or special - marking the end of the atom:
|
|
// re-set sursor to point to the offending
|
|
// char and return:
|
|
scursor--;
|
|
break;
|
|
}
|
|
}
|
|
result.first = start;
|
|
result.second = scursor - start;
|
|
return success;
|
|
}
|
|
|
|
bool parseToken( const char * & scursor, const char * const send,
|
|
TQString & result, bool allow8Bit )
|
|
{
|
|
TQPair<const char*,int> maybeResult;
|
|
|
|
if ( parseToken( scursor, send, maybeResult, allow8Bit ) ) {
|
|
result += TQString::fromLatin1( maybeResult.first, maybeResult.second );
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
bool parseToken( const char * & scursor, const char * const send,
|
|
TQPair<const char*,int> & result, bool allow8Bit )
|
|
{
|
|
bool success = false;
|
|
const char * start = scursor;
|
|
|
|
while ( scursor != send ) {
|
|
signed char ch = *scursor++;
|
|
if ( ch > 0 && isTText(ch) ) {
|
|
// TText: OK
|
|
success = true;
|
|
} else if ( allow8Bit && ch < 0 ) {
|
|
// 8bit char: not OK, but be tolerant.
|
|
KMIME_WARN_8BIT(ch);
|
|
success = true;
|
|
} else {
|
|
// CTL or tspecial - marking the end of the atom:
|
|
// re-set sursor to point to the offending
|
|
// char and return:
|
|
scursor--;
|
|
break;
|
|
}
|
|
}
|
|
result.first = start;
|
|
result.second = scursor - start;
|
|
return success;
|
|
}
|
|
|
|
#define READ_ch_OR_FAIL if ( scursor == send ) { \
|
|
KMIME_WARN_PREMATURE_END_OF(GenericQuotedString); \
|
|
return false; \
|
|
} else { \
|
|
ch = *scursor++; \
|
|
}
|
|
|
|
// known issues:
|
|
//
|
|
// - doesn't handle quoted CRLF
|
|
|
|
bool parseGenericQuotedString( const char* & scursor, const char * const send,
|
|
TQString & result, bool isCRLF,
|
|
const char openChar, const char closeChar )
|
|
{
|
|
char ch;
|
|
// We are in a quoted-string or domain-literal or comment and the
|
|
// cursor points to the first char after the openChar.
|
|
// We will apply unfolding and quoted-pair removal.
|
|
// We return when we either encounter the end or unescaped openChar
|
|
// or closeChar.
|
|
|
|
assert( *(scursor-1) == openChar || *(scursor-1) == closeChar );
|
|
|
|
while ( scursor != send ) {
|
|
ch = *scursor++;
|
|
|
|
if ( ch == closeChar || ch == openChar ) {
|
|
// end of quoted-string or another opening char:
|
|
// let caller decide what to do.
|
|
return true;
|
|
}
|
|
|
|
switch( ch ) {
|
|
case '\\': // quoted-pair
|
|
// misses "\" CRLF LWSP-char handling, see rfc822, 3.4.5
|
|
READ_ch_OR_FAIL;
|
|
KMIME_WARN_IF_8BIT(ch);
|
|
result += TQChar(ch);
|
|
break;
|
|
case '\r':
|
|
// ###
|
|
// The case of lonely '\r' is easy to solve, as they're
|
|
// not part of Unix Line-ending conventions.
|
|
// But I see a problem if we are given Unix-native
|
|
// line-ending-mails, where we cannot determine anymore
|
|
// whether a given '\n' was part of a CRLF or was occurring
|
|
// on it's own.
|
|
READ_ch_OR_FAIL;
|
|
if ( ch != '\n' ) {
|
|
// CR on it's own...
|
|
KMIME_WARN_LONE(CR);
|
|
result += TQChar('\r');
|
|
scursor--; // points to after the '\r' again
|
|
} else {
|
|
// CRLF encountered.
|
|
// lookahead: check for folding
|
|
READ_ch_OR_FAIL;
|
|
if ( ch == ' ' || ch == '\t' ) {
|
|
// correct folding;
|
|
// position cursor behind the CRLF WSP (unfolding)
|
|
// and add the WSP to the result
|
|
result += TQChar(ch);
|
|
} else {
|
|
// this is the "shouldn't happen"-case. There is a CRLF
|
|
// inside a quoted-string without it being part of FWS.
|
|
// We take it verbatim.
|
|
KMIME_WARN_NON_FOLDING(CRLF);
|
|
result += "\r\n";
|
|
// the cursor is decremented again, so's we need not
|
|
// duplicate the whole switch here. "ch" could've been
|
|
// everything (incl. openChar or closeChar).
|
|
scursor--;
|
|
}
|
|
}
|
|
break;
|
|
case '\n':
|
|
// Note: CRLF has been handled above already!
|
|
// ### LF needs special treatment, depending on whether isCRLF
|
|
// is true (we can be sure a lonely '\n' was meant this way) or
|
|
// false ('\n' alone could have meant LF or CRLF in the original
|
|
// message. This parser assumes CRLF iff the LF is followed by
|
|
// either WSP (folding) or NULL (premature end of quoted-string;
|
|
// Should be fixed, since NULL is allowed as per rfc822).
|
|
READ_ch_OR_FAIL;
|
|
if ( !isCRLF && ( ch == ' ' || ch == '\t' ) ) {
|
|
// folding
|
|
// correct folding
|
|
result += TQChar(ch);
|
|
} else {
|
|
// non-folding
|
|
KMIME_WARN_LONE(LF);
|
|
result += TQChar('\n');
|
|
// pos is decremented, so's we need not duplicate the whole
|
|
// switch here. ch could've been everything (incl. <">, "\").
|
|
scursor--;
|
|
}
|
|
break;
|
|
default:
|
|
KMIME_WARN_IF_8BIT(ch);
|
|
result += TQChar(ch);
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
// known issues:
|
|
//
|
|
// - doesn't handle encoded-word inside comments.
|
|
|
|
bool parseComment( const char* & scursor, const char * const send,
|
|
TQString & result, bool isCRLF, bool reallySave )
|
|
{
|
|
int commentNestingDepth = 1;
|
|
const char * afterLastClosingParenPos = 0;
|
|
TQString maybeCmnt;
|
|
const char * oldscursor = scursor;
|
|
|
|
assert( *(scursor-1) == '(' );
|
|
|
|
while ( commentNestingDepth ) {
|
|
TQString cmntPart;
|
|
if ( parseGenericQuotedString( scursor, send, cmntPart, isCRLF, '(', ')' ) ) {
|
|
assert( *(scursor-1) == ')' || *(scursor-1) == '(' );
|
|
// see the kdoc for above function for the possible conditions
|
|
// we have to check:
|
|
switch ( *(scursor-1) ) {
|
|
case ')':
|
|
if ( reallySave ) {
|
|
// add the chunk that's now surely inside the comment.
|
|
result += maybeCmnt;
|
|
result += cmntPart;
|
|
if ( commentNestingDepth > 1 ) // don't add the outermost ')'...
|
|
result += TQChar(')');
|
|
maybeCmnt = TQString();
|
|
}
|
|
afterLastClosingParenPos = scursor;
|
|
--commentNestingDepth;
|
|
break;
|
|
case '(':
|
|
if ( reallySave ) {
|
|
// don't add to "result" yet, because we might find that we
|
|
// are already outside the (broken) comment...
|
|
maybeCmnt += cmntPart;
|
|
maybeCmnt += TQChar('(');
|
|
}
|
|
++commentNestingDepth;
|
|
break;
|
|
default: assert( 0 );
|
|
} // switch
|
|
} else {
|
|
// !parseGenericQuotedString, ie. premature end
|
|
if ( afterLastClosingParenPos )
|
|
scursor = afterLastClosingParenPos;
|
|
else
|
|
scursor = oldscursor;
|
|
return false;
|
|
}
|
|
} // while
|
|
|
|
return true;
|
|
}
|
|
|
|
|
|
// known issues: none.
|
|
|
|
bool parsePhrase( const char* & scursor, const char * const send,
|
|
TQString & result, bool isCRLF )
|
|
{
|
|
enum { None, Phrase, Atom, EncodedWord, QuotedString } found = None;
|
|
TQString tmp;
|
|
TQCString lang;
|
|
const char * successfullyParsed = 0;
|
|
// only used by the encoded-word branch
|
|
const char * oldscursor;
|
|
// used to suppress whitespace between adjacent encoded-words
|
|
// (rfc2047, 6.2):
|
|
bool lastWasEncodedWord = false;
|
|
|
|
while ( scursor != send ) {
|
|
char ch = *scursor++;
|
|
switch ( ch ) {
|
|
case '.': // broken, but allow for intorop's sake
|
|
if ( found == None ) {
|
|
--scursor;
|
|
return false;
|
|
} else {
|
|
if ( scursor != send && ( *scursor == ' ' || *scursor == '\t' ) )
|
|
result += ". ";
|
|
else
|
|
result += '.';
|
|
successfullyParsed = scursor;
|
|
}
|
|
break;
|
|
case '"': // quoted-string
|
|
tmp = TQString();
|
|
if ( parseGenericQuotedString( scursor, send, tmp, isCRLF, '"', '"' ) ) {
|
|
successfullyParsed = scursor;
|
|
assert( *(scursor-1) == '"' );
|
|
switch ( found ) {
|
|
case None:
|
|
found = QuotedString;
|
|
break;
|
|
case Phrase:
|
|
case Atom:
|
|
case EncodedWord:
|
|
case QuotedString:
|
|
found = Phrase;
|
|
result += TQChar(' '); // rfc822, 3.4.4
|
|
break;
|
|
default:
|
|
assert( 0 );
|
|
}
|
|
lastWasEncodedWord = false;
|
|
result += tmp;
|
|
} else {
|
|
// premature end of quoted string.
|
|
// What to do? Return leading '"' as special? Return as quoted-string?
|
|
// We do the latter if we already found something, else signal failure.
|
|
if ( found == None ) {
|
|
return false;
|
|
} else {
|
|
result += TQChar(' '); // rfc822, 3.4.4
|
|
result += tmp;
|
|
return true;
|
|
}
|
|
}
|
|
break;
|
|
case '(': // comment
|
|
// parse it, but ignore content:
|
|
tmp = TQString();
|
|
if ( parseComment( scursor, send, tmp, isCRLF,
|
|
false /*don't bother with the content*/ ) ) {
|
|
successfullyParsed = scursor;
|
|
lastWasEncodedWord = false; // strictly interpreting rfc2047, 6.2
|
|
} else {
|
|
if ( found == None )
|
|
return false;
|
|
else {
|
|
scursor = successfullyParsed;
|
|
return true;
|
|
}
|
|
}
|
|
break;
|
|
case '=': // encoded-word
|
|
tmp = TQString();
|
|
oldscursor = scursor;
|
|
lang = 0;
|
|
if ( parseEncodedWord( scursor, send, tmp, lang ) ) {
|
|
successfullyParsed = scursor;
|
|
switch ( found ) {
|
|
case None:
|
|
found = EncodedWord;
|
|
break;
|
|
case Phrase:
|
|
case EncodedWord:
|
|
case Atom:
|
|
case QuotedString:
|
|
if ( !lastWasEncodedWord )
|
|
result += TQChar(' '); // rfc822, 3.4.4
|
|
found = Phrase;
|
|
break;
|
|
default: assert( 0 );
|
|
}
|
|
lastWasEncodedWord = true;
|
|
result += tmp;
|
|
break;
|
|
} else
|
|
// parse as atom:
|
|
scursor = oldscursor;
|
|
// fall though...
|
|
|
|
default: //atom
|
|
tmp = TQString();
|
|
scursor--;
|
|
if ( parseAtom( scursor, send, tmp, true /* allow 8bit */ ) ) {
|
|
successfullyParsed = scursor;
|
|
switch ( found ) {
|
|
case None:
|
|
found = Atom;
|
|
break;
|
|
case Phrase:
|
|
case Atom:
|
|
case EncodedWord:
|
|
case QuotedString:
|
|
found = Phrase;
|
|
result += TQChar(' '); // rfc822, 3.4.4
|
|
break;
|
|
default:
|
|
assert( 0 );
|
|
}
|
|
lastWasEncodedWord = false;
|
|
result += tmp;
|
|
} else {
|
|
if ( found == None )
|
|
return false;
|
|
else {
|
|
scursor = successfullyParsed;
|
|
return true;
|
|
}
|
|
}
|
|
}
|
|
eatWhiteSpace( scursor, send );
|
|
}
|
|
|
|
return ( found != None );
|
|
}
|
|
|
|
|
|
bool parseDotAtom( const char* & scursor, const char * const send,
|
|
TQString & result, bool isCRLF )
|
|
{
|
|
// always points to just after the last atom parsed:
|
|
const char * successfullyParsed;
|
|
|
|
TQString tmp;
|
|
if ( !parseAtom( scursor, send, tmp, false /* no 8bit */ ) )
|
|
return false;
|
|
result += tmp;
|
|
successfullyParsed = scursor;
|
|
|
|
while ( scursor != send ) {
|
|
eatCFWS( scursor, send, isCRLF );
|
|
|
|
// end of header or no '.' -> return
|
|
if ( scursor == send || *scursor != '.' ) return true;
|
|
scursor++; // eat '.'
|
|
|
|
eatCFWS( scursor, send, isCRLF );
|
|
|
|
if ( scursor == send || !isAText( *scursor ) ) {
|
|
// end of header or no AText, but this time following a '.'!:
|
|
// reset cursor to just after last successfully parsed char and
|
|
// return:
|
|
scursor = successfullyParsed;
|
|
return true;
|
|
}
|
|
|
|
// try to parse the next atom:
|
|
TQString maybeAtom;
|
|
if ( !parseAtom( scursor, send, maybeAtom, false /*no 8bit*/ ) ) {
|
|
scursor = successfullyParsed;
|
|
return true;
|
|
}
|
|
|
|
result += TQChar('.');
|
|
result += maybeAtom;
|
|
successfullyParsed = scursor;
|
|
}
|
|
|
|
scursor = successfullyParsed;
|
|
return true;
|
|
}
|
|
|
|
|
|
void eatCFWS( const char* & scursor, const char * const send, bool isCRLF ) {
|
|
TQString dummy;
|
|
|
|
while ( scursor != send ) {
|
|
const char * oldscursor = scursor;
|
|
|
|
char ch = *scursor++;
|
|
|
|
switch( ch ) {
|
|
case ' ':
|
|
case '\t': // whitespace
|
|
case '\r':
|
|
case '\n': // folding
|
|
continue;
|
|
|
|
case '(': // comment
|
|
if ( parseComment( scursor, send, dummy, isCRLF, false /*don't save*/ ) )
|
|
continue;
|
|
scursor = oldscursor;
|
|
return;
|
|
|
|
default:
|
|
scursor = oldscursor;
|
|
return;
|
|
}
|
|
|
|
}
|
|
}
|
|
|
|
bool parseDomain( const char* & scursor, const char * const send,
|
|
TQString & result, bool isCRLF ) {
|
|
eatCFWS( scursor, send, isCRLF );
|
|
if ( scursor == send ) return false;
|
|
|
|
// domain := dot-atom / domain-literal / atom *("." atom)
|
|
//
|
|
// equivalent to:
|
|
// domain = dot-atom / domain-literal,
|
|
// since parseDotAtom does allow CFWS between atoms and dots
|
|
|
|
if ( *scursor == '[' ) {
|
|
// domain-literal:
|
|
TQString maybeDomainLiteral;
|
|
// eat '[':
|
|
scursor++;
|
|
while ( parseGenericQuotedString( scursor, send, maybeDomainLiteral,
|
|
isCRLF, '[', ']' ) ) {
|
|
if ( scursor == send ) {
|
|
// end of header: check for closing ']':
|
|
if ( *(scursor-1) == ']' ) {
|
|
// OK, last char was ']':
|
|
result = maybeDomainLiteral;
|
|
return true;
|
|
} else {
|
|
// not OK, domain-literal wasn't closed:
|
|
return false;
|
|
}
|
|
}
|
|
// we hit openChar in parseGenericQuotedString.
|
|
// include it in maybeDomainLiteral and keep on parsing:
|
|
if ( *(scursor-1) == '[' ) {
|
|
maybeDomainLiteral += TQChar('[');
|
|
continue;
|
|
}
|
|
// OK, real end of domain-literal:
|
|
result = maybeDomainLiteral;
|
|
return true;
|
|
}
|
|
} else {
|
|
// dot-atom:
|
|
TQString maybeDotAtom;
|
|
if ( parseDotAtom( scursor, send, maybeDotAtom, isCRLF ) ) {
|
|
result = maybeDotAtom;
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
bool parseObsRoute( const char* & scursor, const char* const send,
|
|
TQStringList & result, bool isCRLF, bool save ) {
|
|
while ( scursor != send ) {
|
|
eatCFWS( scursor, send, isCRLF );
|
|
if ( scursor == send ) return false;
|
|
|
|
// empty entry:
|
|
if ( *scursor == ',' ) {
|
|
scursor++;
|
|
if ( save ) result.append( TQString() );
|
|
continue;
|
|
}
|
|
|
|
// empty entry ending the list:
|
|
if ( *scursor == ':' ) {
|
|
scursor++;
|
|
if ( save ) result.append( TQString() );
|
|
return true;
|
|
}
|
|
|
|
// each non-empty entry must begin with '@':
|
|
if ( *scursor != '@' )
|
|
return false;
|
|
else
|
|
scursor++;
|
|
|
|
TQString maybeDomain;
|
|
if ( !parseDomain( scursor, send, maybeDomain, isCRLF ) ) return false;
|
|
if ( save ) result.append( maybeDomain );
|
|
|
|
// eat the following (optional) comma:
|
|
eatCFWS( scursor, send, isCRLF );
|
|
if ( scursor == send ) return false;
|
|
if ( *scursor == ':' ) { scursor++; return true; }
|
|
if ( *scursor == ',' ) scursor++;
|
|
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
bool parseAddrSpec( const char* & scursor, const char * const send,
|
|
AddrSpec & result, bool isCRLF ) {
|
|
//
|
|
// STEP 1:
|
|
// local-part := dot-atom / quoted-string / word *("." word)
|
|
//
|
|
// this is equivalent to:
|
|
// local-part := word *("." word)
|
|
|
|
TQString maybeLocalPart;
|
|
TQString tmp;
|
|
|
|
while ( scursor != send ) {
|
|
// first, eat any whitespace
|
|
eatCFWS( scursor, send, isCRLF );
|
|
|
|
char ch = *scursor++;
|
|
switch ( ch ) {
|
|
case '.': // dot
|
|
maybeLocalPart += TQChar('.');
|
|
break;
|
|
|
|
case '@':
|
|
goto SAW_AT_SIGN;
|
|
break;
|
|
|
|
case '"': // quoted-string
|
|
tmp = TQString();
|
|
if ( parseGenericQuotedString( scursor, send, tmp, isCRLF, '"', '"' ) )
|
|
maybeLocalPart += tmp;
|
|
else
|
|
return false;
|
|
break;
|
|
|
|
default: // atom
|
|
scursor--; // re-set scursor to point to ch again
|
|
tmp = TQString();
|
|
if ( parseAtom( scursor, send, tmp, false /* no 8bit */ ) )
|
|
maybeLocalPart += tmp;
|
|
else
|
|
return false; // parseAtom can only fail if the first char is non-atext.
|
|
break;
|
|
}
|
|
}
|
|
|
|
return false;
|
|
|
|
|
|
//
|
|
// STEP 2:
|
|
// domain
|
|
//
|
|
|
|
SAW_AT_SIGN:
|
|
|
|
assert( *(scursor-1) == '@' );
|
|
|
|
TQString maybeDomain;
|
|
if ( !parseDomain( scursor, send, maybeDomain, isCRLF ) )
|
|
return false;
|
|
|
|
result.localPart = maybeLocalPart;
|
|
result.domain = maybeDomain;
|
|
|
|
return true;
|
|
}
|
|
|
|
|
|
bool parseAngleAddr( const char* & scursor, const char * const send,
|
|
AddrSpec & result, bool isCRLF ) {
|
|
// first, we need an opening angle bracket:
|
|
eatCFWS( scursor, send, isCRLF );
|
|
if ( scursor == send || *scursor != '<' ) return false;
|
|
scursor++; // eat '<'
|
|
|
|
eatCFWS( scursor, send, isCRLF );
|
|
if ( scursor == send ) return false;
|
|
|
|
if ( *scursor == '@' || *scursor == ',' ) {
|
|
// obs-route: parse, but ignore:
|
|
KMIME_WARN << "obsolete source route found! ignoring." << endl;
|
|
TQStringList dummy;
|
|
if ( !parseObsRoute( scursor, send, dummy,
|
|
isCRLF, false /* don't save */ ) )
|
|
return false;
|
|
// angle-addr isn't complete until after the '>':
|
|
if ( scursor == send ) return false;
|
|
}
|
|
|
|
// parse addr-spec:
|
|
AddrSpec maybeAddrSpec;
|
|
if ( !parseAddrSpec( scursor, send, maybeAddrSpec, isCRLF ) ) return false;
|
|
|
|
eatCFWS( scursor, send, isCRLF );
|
|
if ( scursor == send || *scursor != '>' ) return false;
|
|
scursor++;
|
|
|
|
result = maybeAddrSpec;
|
|
return true;
|
|
|
|
}
|
|
|
|
bool parseMailbox( const char* & scursor, const char * const send,
|
|
Mailbox & result, bool isCRLF ) {
|
|
|
|
// rfc:
|
|
// mailbox := addr-spec / ([ display-name ] angle-addr)
|
|
// us:
|
|
// mailbox := addr-spec / ([ display-name ] angle-addr)
|
|
// / (angle-addr "(" display-name ")")
|
|
|
|
eatCFWS( scursor, send, isCRLF );
|
|
if ( scursor == send ) return false;
|
|
|
|
AddrSpec maybeAddrSpec;
|
|
|
|
// first, try if it's a vanilla addr-spec:
|
|
const char * oldscursor = scursor;
|
|
if ( parseAddrSpec( scursor, send, maybeAddrSpec, isCRLF ) ) {
|
|
result.displayName = TQString();
|
|
result.addrSpec = maybeAddrSpec;
|
|
return true;
|
|
}
|
|
scursor = oldscursor;
|
|
|
|
// second, see if there's a display-name:
|
|
TQString maybeDisplayName;
|
|
if ( !parsePhrase( scursor, send, maybeDisplayName, isCRLF ) ) {
|
|
// failed: reset cursor, note absent display-name
|
|
maybeDisplayName = TQString();
|
|
scursor = oldscursor;
|
|
} else {
|
|
// succeeded: eat CFWS
|
|
eatCFWS( scursor, send, isCRLF );
|
|
if ( scursor == send ) return false;
|
|
}
|
|
|
|
// third, parse the angle-addr:
|
|
if ( !parseAngleAddr( scursor, send, maybeAddrSpec, isCRLF ) )
|
|
return false;
|
|
|
|
if ( maybeDisplayName.isNull() ) {
|
|
// check for the obsolete form of display-name (as comment):
|
|
eatWhiteSpace( scursor, send );
|
|
if ( scursor != send && *scursor == '(' ) {
|
|
scursor++;
|
|
if ( !parseComment( scursor, send, maybeDisplayName, isCRLF, true /*keep*/ ) )
|
|
return false;
|
|
}
|
|
}
|
|
|
|
result.displayName = maybeDisplayName;
|
|
result.addrSpec = maybeAddrSpec;
|
|
return true;
|
|
}
|
|
|
|
bool parseGroup( const char* & scursor, const char * const send,
|
|
Address & result, bool isCRLF ) {
|
|
// group := display-name ":" [ mailbox-list / CFWS ] ";" [CFWS]
|
|
//
|
|
// equivalent to:
|
|
// group := display-name ":" [ obs-mbox-list ] ";"
|
|
|
|
eatCFWS( scursor, send, isCRLF );
|
|
if ( scursor == send ) return false;
|
|
|
|
// get display-name:
|
|
TQString maybeDisplayName;
|
|
if ( !parsePhrase( scursor, send, maybeDisplayName, isCRLF ) )
|
|
return false;
|
|
|
|
// get ":":
|
|
eatCFWS( scursor, send, isCRLF );
|
|
if ( scursor == send || *scursor != ':' ) return false;
|
|
|
|
result.displayName = maybeDisplayName;
|
|
|
|
// get obs-mbox-list (may contain empty entries):
|
|
scursor++;
|
|
while ( scursor != send ) {
|
|
eatCFWS( scursor, send, isCRLF );
|
|
if ( scursor == send ) return false;
|
|
|
|
// empty entry:
|
|
if ( *scursor == ',' ) { scursor++; continue; }
|
|
|
|
// empty entry ending the list:
|
|
if ( *scursor == ';' ) { scursor++; return true; }
|
|
|
|
Mailbox maybeMailbox;
|
|
if ( !parseMailbox( scursor, send, maybeMailbox, isCRLF ) )
|
|
return false;
|
|
result.mailboxList.append( maybeMailbox );
|
|
|
|
eatCFWS( scursor, send, isCRLF );
|
|
// premature end:
|
|
if ( scursor == send ) return false;
|
|
// regular end of the list:
|
|
if ( *scursor == ';' ) { scursor++; return true; }
|
|
// eat regular list entry separator:
|
|
if ( *scursor == ',' ) scursor++;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
|
|
bool parseAddress( const char* & scursor, const char * const send,
|
|
Address & result, bool isCRLF ) {
|
|
// address := mailbox / group
|
|
|
|
eatCFWS( scursor, send, isCRLF );
|
|
if ( scursor == send ) return false;
|
|
|
|
// first try if it's a single mailbox:
|
|
Mailbox maybeMailbox;
|
|
const char * oldscursor = scursor;
|
|
if ( parseMailbox( scursor, send, maybeMailbox, isCRLF ) ) {
|
|
// yes, it is:
|
|
result.displayName = TQString();
|
|
result.mailboxList.append( maybeMailbox );
|
|
return true;
|
|
}
|
|
scursor = oldscursor;
|
|
|
|
Address maybeAddress;
|
|
|
|
// no, it's not a single mailbox. Try if it's a group:
|
|
if ( !parseGroup( scursor, send, maybeAddress, isCRLF ) )
|
|
return false;
|
|
|
|
result = maybeAddress;
|
|
return true;
|
|
}
|
|
|
|
bool parseAddressList( const char* & scursor, const char * const send,
|
|
AddressList & result, bool isCRLF ) {
|
|
while ( scursor != send ) {
|
|
eatCFWS( scursor, send, isCRLF );
|
|
// end of header: this is OK.
|
|
if ( scursor == send ) return true;
|
|
// empty entry: ignore:
|
|
if ( *scursor == ',' ) { scursor++; continue; }
|
|
|
|
// parse one entry
|
|
Address maybeAddress;
|
|
if ( !parseAddress( scursor, send, maybeAddress, isCRLF ) ) return false;
|
|
result.append( maybeAddress );
|
|
|
|
eatCFWS( scursor, send, isCRLF );
|
|
// end of header: this is OK.
|
|
if ( scursor == send ) return true;
|
|
// comma separating entries: eat it.
|
|
if ( *scursor == ',' ) scursor++;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
|
|
static TQString asterisk = TQString::fromLatin1("*0*",1);
|
|
static TQString asteriskZero = TQString::fromLatin1("*0*",2);
|
|
//static TQString asteriskZeroAsterisk = TQString::fromLatin1("*0*",3);
|
|
|
|
bool parseParameter( const char* & scursor, const char * const send,
|
|
TQPair<TQString,TQStringOrTQPair> & result, bool isCRLF ) {
|
|
// parameter = regular-parameter / extended-parameter
|
|
// regular-parameter = regular-parameter-name "=" value
|
|
// extended-parameter =
|
|
// value = token / quoted-string
|
|
//
|
|
// note that rfc2231 handling is out of the scope of this function.
|
|
// Therefore we return the attribute as TQString and the value as
|
|
// (start,length) tupel if we see that the value is encoded
|
|
// (trailing asterisk), for parseParameterList to decode...
|
|
|
|
eatCFWS( scursor, send, isCRLF );
|
|
if ( scursor == send ) return false;
|
|
|
|
//
|
|
// parse the parameter name:
|
|
//
|
|
TQString maybeAttribute;
|
|
if ( !parseToken( scursor, send, maybeAttribute, false /* no 8bit */ ) )
|
|
return false;
|
|
|
|
eatCFWS( scursor, send, isCRLF );
|
|
// premature end: not OK (haven't seen '=' yet).
|
|
if ( scursor == send || *scursor != '=' ) return false;
|
|
scursor++; // eat '='
|
|
|
|
eatCFWS( scursor, send, isCRLF );
|
|
if ( scursor == send ) {
|
|
// don't choke on attribute=, meaning the value was omitted:
|
|
if ( maybeAttribute.endsWith( asterisk ) ) {
|
|
KMIME_WARN << "attribute ends with \"*\", but value is empty! "
|
|
"Chopping away \"*\"." << endl;
|
|
maybeAttribute.truncate( maybeAttribute.length() - 1 );
|
|
}
|
|
result = tqMakePair( maybeAttribute.lower(), TQStringOrTQPair() );
|
|
return true;
|
|
}
|
|
|
|
const char * oldscursor = scursor;
|
|
|
|
//
|
|
// parse the parameter value:
|
|
//
|
|
TQStringOrTQPair maybeValue;
|
|
if ( *scursor == '"' ) {
|
|
// value is a quoted-string:
|
|
scursor++;
|
|
if ( maybeAttribute.endsWith( asterisk ) ) {
|
|
// attributes ending with "*" designate extended-parameters,
|
|
// which cannot have quoted-strings as values. So we remove the
|
|
// trailing "*" to not confuse upper layers.
|
|
KMIME_WARN << "attribute ends with \"*\", but value is a quoted-string! "
|
|
"Chopping away \"*\"." << endl;
|
|
maybeAttribute.truncate( maybeAttribute.length() - 1 );
|
|
}
|
|
|
|
if ( !parseGenericQuotedString( scursor, send, maybeValue.qstring, isCRLF ) ) {
|
|
scursor = oldscursor;
|
|
result = tqMakePair( maybeAttribute.lower(), TQStringOrTQPair() );
|
|
return false; // this case needs further processing by upper layers!!
|
|
}
|
|
} else {
|
|
// value is a token:
|
|
if ( !parseToken( scursor, send, maybeValue.qpair, false /* no 8bit */ ) ) {
|
|
scursor = oldscursor;
|
|
result = tqMakePair( maybeAttribute.lower(), TQStringOrTQPair() );
|
|
return false; // this case needs further processing by upper layers!!
|
|
}
|
|
}
|
|
|
|
result = tqMakePair( maybeAttribute.lower(), maybeValue );
|
|
return true;
|
|
}
|
|
|
|
|
|
|
|
bool parseRawParameterList( const char* & scursor, const char * const send,
|
|
TQMap<TQString,TQStringOrTQPair> & result,
|
|
bool isCRLF ) {
|
|
// we use parseParameter() consecutively to obtain a map of raw
|
|
// attributes to raw values. "Raw" here means that we don't do
|
|
// rfc2231 decoding and concatenation. This is left to
|
|
// parseParameterList(), which will call this function.
|
|
//
|
|
// The main reason for making this chunk of code a separate
|
|
// (private) method is that we can deal with broken parameters
|
|
// _here_ and leave the rfc2231 handling solely to
|
|
// parseParameterList(), which will still be enough work.
|
|
|
|
while ( scursor != send ) {
|
|
eatCFWS( scursor, send, isCRLF );
|
|
// empty entry ending the list: OK.
|
|
if ( scursor == send ) return true;
|
|
// empty list entry: ignore.
|
|
if ( *scursor == ';' ) { scursor++; continue; }
|
|
|
|
TQPair<TQString,TQStringOrTQPair> maybeParameter;
|
|
if ( !parseParameter( scursor, send, maybeParameter, isCRLF ) ) {
|
|
// we need to do a bit of work if the attribute is not
|
|
// NULL. These are the cases marked with "needs further
|
|
// processing" in parseParameter(). Specifically, parsing of the
|
|
// token or the quoted-string, which should represent the value,
|
|
// failed. We take the easy way out and simply search for the
|
|
// next ';' to start parsing again. (Another option would be to
|
|
// take the text between '=' and ';' as value)
|
|
if ( maybeParameter.first.isNull() ) return false;
|
|
while ( scursor != send ) {
|
|
if ( *scursor++ == ';' ) goto IS_SEMICOLON;
|
|
}
|
|
// scursor == send case: end of list.
|
|
return true;
|
|
IS_SEMICOLON:
|
|
// *scursor == ';' case: parse next entry.
|
|
continue;
|
|
}
|
|
// successful parsing brings us here:
|
|
result.insert( maybeParameter.first, maybeParameter.second );
|
|
|
|
eatCFWS( scursor, send, isCRLF );
|
|
// end of header: ends list.
|
|
if ( scursor == send ) return true;
|
|
// regular separator: eat it.
|
|
if ( *scursor == ';' ) scursor++;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
|
|
static void decodeRFC2231Value( Codec* & rfc2231Codec,
|
|
TQTextCodec* & textcodec,
|
|
bool isContinuation, TQString & value,
|
|
TQPair<const char*,int> & source ) {
|
|
|
|
//
|
|
// parse the raw value into (charset,language,text):
|
|
//
|
|
|
|
const char * decBegin = source.first;
|
|
const char * decCursor = decBegin;
|
|
const char * decEnd = decCursor + source.second;
|
|
|
|
if ( !isContinuation ) {
|
|
// find the first single quote
|
|
while ( decCursor != decEnd ) {
|
|
if ( *decCursor == '\'' ) break;
|
|
else decCursor++;
|
|
}
|
|
|
|
if ( decCursor == decEnd ) {
|
|
// there wasn't a single single quote at all!
|
|
// take the whole value to be in latin-1:
|
|
KMIME_WARN << "No charset in extended-initial-value. "
|
|
"Assuming \"iso-8859-1\"." << endl;
|
|
value += TQString::fromLatin1( decBegin, source.second );
|
|
return;
|
|
}
|
|
|
|
TQCString charset( decBegin, decCursor - decBegin + 1 );
|
|
|
|
const char * oldDecCursor = ++decCursor;
|
|
// find the second single quote (we ignore the language tag):
|
|
while ( decCursor != decEnd ) {
|
|
if ( *decCursor == '\'' ) break;
|
|
else decCursor++;
|
|
}
|
|
if ( decCursor == decEnd ) {
|
|
KMIME_WARN << "No language in extended-initial-value. "
|
|
"Trying to recover." << endl;
|
|
decCursor = oldDecCursor;
|
|
} else
|
|
decCursor++;
|
|
|
|
// decCursor now points to the start of the
|
|
// "extended-other-values":
|
|
|
|
//
|
|
// get the decoders:
|
|
//
|
|
|
|
bool matchOK = false;
|
|
textcodec = KGlobal::charsets()->codecForName( charset, matchOK );
|
|
if ( !matchOK ) {
|
|
textcodec = 0;
|
|
KMIME_WARN_UNKNOWN(Charset,charset);
|
|
}
|
|
}
|
|
|
|
if ( !rfc2231Codec ) {
|
|
rfc2231Codec = Codec::codecForName("x-kmime-rfc2231");
|
|
assert( rfc2231Codec );
|
|
}
|
|
|
|
if ( !textcodec ) {
|
|
value += TQString::fromLatin1( decCursor, decEnd - decCursor );
|
|
return;
|
|
}
|
|
|
|
Decoder * dec = rfc2231Codec->makeDecoder();
|
|
assert( dec );
|
|
|
|
//
|
|
// do the decoding:
|
|
//
|
|
|
|
TQByteArray buffer( rfc2231Codec->maxDecodedSizeFor( decEnd - decCursor ) );
|
|
TQByteArray::Iterator bit = buffer.begin();
|
|
TQByteArray::ConstIterator bend = buffer.end();
|
|
|
|
if ( !dec->decode( decCursor, decEnd, bit, bend ) )
|
|
KMIME_WARN << rfc2231Codec->name()
|
|
<< " codec lies about it's maxDecodedSizeFor()\n"
|
|
"result may be truncated" << endl;
|
|
|
|
value += textcodec->toUnicode( buffer.begin(), bit - buffer.begin() );
|
|
|
|
kdDebug() << "value now: \"" << value << "\"" << endl;
|
|
// cleanup:
|
|
delete dec;
|
|
}
|
|
|
|
// known issues:
|
|
// - permutes rfc2231 continuations when the total number of parts
|
|
// exceeds 10 (other-sections then becomes *xy, ie. two digits)
|
|
|
|
bool parseParameterList( const char* & scursor, const char * const send,
|
|
TQMap<TQString,TQString> & result, bool isCRLF ) {
|
|
// parse the list into raw attribute-value pairs:
|
|
TQMap<TQString,TQStringOrTQPair> rawParameterList;
|
|
if (!parseRawParameterList( scursor, send, rawParameterList, isCRLF ) )
|
|
return false;
|
|
|
|
if ( rawParameterList.isEmpty() ) return true;
|
|
|
|
// decode rfc 2231 continuations and alternate charset encoding:
|
|
|
|
// NOTE: this code assumes that what TQMapIterator delivers is sorted
|
|
// by the key!
|
|
|
|
Codec * rfc2231Codec = 0;
|
|
TQTextCodec * textcodec = 0;
|
|
TQString attribute;
|
|
TQString value;
|
|
enum Modes { NoMode = 0x0, Continued = 0x1, Encoded = 0x2 } mode;
|
|
|
|
TQMapIterator<TQString,TQStringOrTQPair> it, end = rawParameterList.end();
|
|
|
|
for ( it = rawParameterList.begin() ; it != end ; ++it ) {
|
|
if ( attribute.isNull() || !it.key().startsWith( attribute ) ) {
|
|
//
|
|
// new attribute:
|
|
//
|
|
|
|
// store the last attribute/value pair in the result map now:
|
|
if ( !attribute.isNull() ) result.insert( attribute, value );
|
|
// and extract the information from the new raw attribute:
|
|
value = TQString();
|
|
attribute = it.key();
|
|
mode = NoMode;
|
|
// is the value encoded?
|
|
if ( attribute.endsWith( asterisk ) ) {
|
|
attribute.truncate( attribute.length() - 1 );
|
|
mode = (Modes) ((int) mode | Encoded);
|
|
}
|
|
// is the value continued?
|
|
if ( attribute.endsWith( asteriskZero ) ) {
|
|
attribute.truncate( attribute.length() - 2 );
|
|
mode = (Modes) ((int) mode | Continued);
|
|
}
|
|
//
|
|
// decode if necessary:
|
|
//
|
|
if ( mode & Encoded ) {
|
|
decodeRFC2231Value( rfc2231Codec, textcodec,
|
|
false, /* isn't continuation */
|
|
value, (*it).qpair );
|
|
} else {
|
|
// not encoded.
|
|
if ( (*it).qpair.first )
|
|
value += TQString::fromLatin1( (*it).qpair.first, (*it).qpair.second );
|
|
else
|
|
value += (*it).qstring;
|
|
}
|
|
|
|
//
|
|
// shortcut-processing when the value isn't encoded:
|
|
//
|
|
|
|
if ( !(mode & Continued) ) {
|
|
// save result already:
|
|
result.insert( attribute, value );
|
|
// force begin of a new attribute:
|
|
attribute = TQString();
|
|
}
|
|
} else /* it.key().startsWith( attribute ) */ {
|
|
//
|
|
// continuation
|
|
//
|
|
|
|
// ignore the section and trust TQMap to have sorted the keys:
|
|
if ( it.key().endsWith( asterisk ) ) {
|
|
// encoded
|
|
decodeRFC2231Value( rfc2231Codec, textcodec,
|
|
true, /* is continuation */
|
|
value, (*it).qpair );
|
|
} else {
|
|
// not encoded
|
|
if ( (*it).qpair.first )
|
|
value += TQString::fromLatin1( (*it).qpair.first, (*it).qpair.second );
|
|
else
|
|
value += (*it).qstring;
|
|
}
|
|
}
|
|
}
|
|
|
|
// write last attr/value pair:
|
|
if ( !attribute.isNull() )
|
|
result.insert( attribute, value );
|
|
|
|
return true;
|
|
}
|
|
|
|
static const char * stdDayNames[] = {
|
|
"Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"
|
|
};
|
|
static const int stdDayNamesLen = sizeof stdDayNames / sizeof *stdDayNames;
|
|
|
|
static bool parseDayName( const char* & scursor, const char * const send )
|
|
{
|
|
// check bounds:
|
|
if ( send - scursor < 3 ) return false;
|
|
|
|
for ( int i = 0 ; i < stdDayNamesLen ; ++i )
|
|
if ( qstrnicmp( scursor, stdDayNames[i], 3 ) == 0 ) {
|
|
scursor += 3;
|
|
kdDebug() << "found " << stdDayNames[i] << endl;
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
|
|
static const char * stdMonthNames[] = {
|
|
"Jan", "Feb", "Mar", "Apr", "May", "Jun",
|
|
"Jul", "Aug", "Sep", "Oct", "Nov", "Dez"
|
|
};
|
|
static const int stdMonthNamesLen =
|
|
sizeof stdMonthNames / sizeof *stdMonthNames;
|
|
|
|
static bool parseMonthName( const char* & scursor, const char * const send,
|
|
int & result )
|
|
{
|
|
// check bounds:
|
|
if ( send - scursor < 3 ) return false;
|
|
|
|
for ( result = 0 ; result < stdMonthNamesLen ; ++result )
|
|
if ( qstrnicmp( scursor, stdMonthNames[result], 3 ) == 0 ) {
|
|
scursor += 3;
|
|
return true;
|
|
}
|
|
|
|
// not found:
|
|
return false;
|
|
}
|
|
|
|
static const struct {
|
|
const char * tzName;
|
|
long int secsEastOfGMT;
|
|
} timeZones[] = {
|
|
// rfc 822 timezones:
|
|
{ "GMT", 0 },
|
|
{ "UT", 0 },
|
|
{ "EDT", -4*3600 },
|
|
{ "EST", -5*3600 },
|
|
{ "MST", -5*3600 },
|
|
{ "CST", -6*3600 },
|
|
{ "MDT", -6*3600 },
|
|
{ "MST", -7*3600 },
|
|
{ "PDT", -7*3600 },
|
|
{ "PST", -8*3600 },
|
|
// common, non-rfc-822 zones:
|
|
{ "CET", 1*3600 },
|
|
{ "MET", 1*3600 },
|
|
{ "UTC", 0 },
|
|
{ "CEST", 2*3600 },
|
|
{ "BST", 1*3600 },
|
|
// rfc 822 military timezones:
|
|
{ "Z", 0 },
|
|
{ "A", -1*3600 },
|
|
{ "B", -2*3600 },
|
|
{ "C", -3*3600 },
|
|
{ "D", -4*3600 },
|
|
{ "E", -5*3600 },
|
|
{ "F", -6*3600 },
|
|
{ "G", -7*3600 },
|
|
{ "H", -8*3600 },
|
|
{ "I", -9*3600 },
|
|
// J is not used!
|
|
{ "K", -10*3600 },
|
|
{ "L", -11*3600 },
|
|
{ "M", -12*3600 },
|
|
{ "N", 1*3600 },
|
|
{ "O", 2*3600 },
|
|
{ "P", 3*3600 },
|
|
{ "Q", 4*3600 },
|
|
{ "R", 5*3600 },
|
|
{ "S", 6*3600 },
|
|
{ "T", 7*3600 },
|
|
{ "U", 8*3600 },
|
|
{ "V", 9*3600 },
|
|
{ "W", 10*3600 },
|
|
{ "X", 11*3600 },
|
|
{ "Y", 12*3600 },
|
|
};
|
|
static const int timeZonesLen = sizeof timeZones / sizeof *timeZones;
|
|
|
|
static bool parseAlphaNumericTimeZone( const char* & scursor,
|
|
const char * const send,
|
|
long int & secsEastOfGMT,
|
|
bool & timeZoneKnown )
|
|
{
|
|
TQPair<const char*,int> maybeTimeZone(0,0);
|
|
if ( !parseToken( scursor, send, maybeTimeZone, false /*no 8bit*/ ) )
|
|
return false;
|
|
for ( int i = 0 ; i < timeZonesLen ; ++i )
|
|
if ( qstrnicmp( timeZones[i].tzName,
|
|
maybeTimeZone.first, maybeTimeZone.second ) == 0 ) {
|
|
scursor += maybeTimeZone.second;
|
|
secsEastOfGMT = timeZones[i].secsEastOfGMT;
|
|
timeZoneKnown = true;
|
|
return true;
|
|
}
|
|
|
|
// don't choke just because we don't happen to know the time zone
|
|
KMIME_WARN_UNKNOWN(time zone,TQCString( maybeTimeZone.first, maybeTimeZone.second+1 ));
|
|
secsEastOfGMT = 0;
|
|
timeZoneKnown = false;
|
|
return true;
|
|
}
|
|
|
|
// parse a number and return the number of digits parsed:
|
|
static int parseDigits( const char* & scursor, const char * const send,
|
|
int & result )
|
|
{
|
|
result = 0;
|
|
int digits = 0;
|
|
for ( ; scursor != send && isdigit( *scursor ) ; scursor++, digits++ ) {
|
|
result *= 10;
|
|
result += int( *scursor - '0' );
|
|
}
|
|
return digits;
|
|
}
|
|
|
|
static bool parseTimeOfDay( const char* & scursor, const char * const send,
|
|
int & hour, int & min, int & sec, bool isCRLF=false )
|
|
{
|
|
// time-of-day := 2DIGIT [CFWS] ":" [CFWS] 2DIGIT [ [CFWS] ":" 2DIGIT ]
|
|
|
|
//
|
|
// 2DIGIT representing "hour":
|
|
//
|
|
if ( !parseDigits( scursor, send, hour ) ) return false;
|
|
|
|
eatCFWS( scursor, send, isCRLF );
|
|
if ( scursor == send || *scursor != ':' ) return false;
|
|
scursor++; // eat ':'
|
|
|
|
eatCFWS( scursor, send, isCRLF );
|
|
if ( scursor == send ) return false;
|
|
|
|
//
|
|
// 2DIGIT representing "minute":
|
|
//
|
|
if ( !parseDigits( scursor, send, min ) ) return false;
|
|
|
|
eatCFWS( scursor, send, isCRLF );
|
|
if ( scursor == send ) return true; // seconds are optional
|
|
|
|
//
|
|
// let's see if we have a 2DIGIT representing "second":
|
|
//
|
|
if ( *scursor == ':' ) {
|
|
// yepp, there are seconds:
|
|
scursor++; // eat ':'
|
|
eatCFWS( scursor, send, isCRLF );
|
|
if ( scursor == send ) return false;
|
|
|
|
if ( !parseDigits( scursor, send, sec ) ) return false;
|
|
} else {
|
|
sec = 0;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
|
|
bool parseTime( const char* & scursor, const char * send,
|
|
int & hour, int & min, int & sec, long int & secsEastOfGMT,
|
|
bool & timeZoneKnown, bool isCRLF )
|
|
{
|
|
// time := time-of-day CFWS ( zone / obs-zone )
|
|
//
|
|
// obs-zone := "UT" / "GMT" /
|
|
// "EST" / "EDT" / ; -0500 / -0400
|
|
// "CST" / "CDT" / ; -0600 / -0500
|
|
// "MST" / "MDT" / ; -0700 / -0600
|
|
// "PST" / "PDT" / ; -0800 / -0700
|
|
// "A"-"I" / "a"-"i" /
|
|
// "K"-"Z" / "k"-"z"
|
|
|
|
eatCFWS( scursor, send, isCRLF );
|
|
if ( scursor == send ) return false;
|
|
|
|
if ( !parseTimeOfDay( scursor, send, hour, min, sec, isCRLF ) )
|
|
return false;
|
|
|
|
eatCFWS( scursor, send, isCRLF );
|
|
if ( scursor == send ) {
|
|
timeZoneKnown = false;
|
|
secsEastOfGMT = 0;
|
|
return true; // allow missing timezone
|
|
}
|
|
|
|
timeZoneKnown = true;
|
|
if ( *scursor == '+' || *scursor == '-' ) {
|
|
// remember and eat '-'/'+':
|
|
const char sign = *scursor++;
|
|
// numerical timezone:
|
|
int maybeTimeZone;
|
|
if ( parseDigits( scursor, send, maybeTimeZone ) != 4 ) return false;
|
|
secsEastOfGMT = 60 * ( maybeTimeZone / 100 * 60 + maybeTimeZone % 100 );
|
|
if ( sign == '-' ) {
|
|
secsEastOfGMT *= -1;
|
|
if ( secsEastOfGMT == 0 )
|
|
timeZoneKnown = false; // -0000 means indetermined tz
|
|
}
|
|
} else {
|
|
// maybe alphanumeric timezone:
|
|
if ( !parseAlphaNumericTimeZone( scursor, send, secsEastOfGMT, timeZoneKnown ) )
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
|
|
bool parseDateTime( const char* & scursor, const char * const send,
|
|
Types::DateTime & result, bool isCRLF )
|
|
{
|
|
// Parsing date-time; strict mode:
|
|
//
|
|
// date-time := [ [CFWS] day-name [CFWS] "," ] ; wday
|
|
// (expanded) [CFWS] 1*2DIGIT CFWS month-name CFWS 2*DIGIT [CFWS] ; date
|
|
// time
|
|
//
|
|
// day-name := "Mon" / "Tue" / "Wed" / "Thu" / "Fri" / "Sat" / "Sun"
|
|
// month-name := "Jan" / "Feb" / "Mar" / "Apr" / "May" / "Jun" /
|
|
// "Jul" / "Aug" / "Sep" / "Oct" / "Nov" / "Dez"
|
|
|
|
struct tm maybeDateTime = {
|
|
#ifdef HAVE_TM_GMTOFF
|
|
0, 0, // initializers for members tm_gmtoff and tm_zone
|
|
#endif
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0
|
|
};
|
|
|
|
eatCFWS( scursor, send, isCRLF );
|
|
if ( scursor == send ) return false;
|
|
|
|
//
|
|
// let's see if there's a day-of-week:
|
|
//
|
|
if ( parseDayName( scursor, send ) ) {
|
|
eatCFWS( scursor, send, isCRLF );
|
|
if ( scursor == send ) return false;
|
|
// day-name should be followed by ',' but we treat it as optional:
|
|
if ( *scursor == ',' ) {
|
|
scursor++; // eat ','
|
|
eatCFWS( scursor, send, isCRLF );
|
|
}
|
|
}
|
|
|
|
//
|
|
// 1*2DIGIT representing "day" (of month):
|
|
//
|
|
int maybeDay;
|
|
if ( !parseDigits( scursor, send, maybeDay ) ) return false;
|
|
|
|
eatCFWS( scursor, send, isCRLF );
|
|
if ( scursor == send ) return false;
|
|
|
|
// success: store maybeDay in maybeDateTime:
|
|
maybeDateTime.tm_mday = maybeDay;
|
|
|
|
//
|
|
// month-name:
|
|
//
|
|
int maybeMonth = 0;
|
|
if ( !parseMonthName( scursor, send, maybeMonth ) ) return false;
|
|
if ( scursor == send ) return false;
|
|
assert( maybeMonth >= 0 ); assert( maybeMonth <= 11 );
|
|
|
|
eatCFWS( scursor, send, isCRLF );
|
|
if ( scursor == send ) return false;
|
|
|
|
// success: store maybeMonth in maybeDateTime:
|
|
maybeDateTime.tm_mon = maybeMonth;
|
|
|
|
//
|
|
// 2*DIGIT representing "year":
|
|
//
|
|
int maybeYear;
|
|
if ( !parseDigits( scursor, send, maybeYear ) ) return false;
|
|
// RFC 2822 4.3 processing:
|
|
if ( maybeYear < 50 )
|
|
maybeYear += 2000;
|
|
else if ( maybeYear < 1000 )
|
|
maybeYear += 1900;
|
|
// else keep as is
|
|
if ( maybeYear < 1900 ) return false; // rfc2822, 3.3
|
|
|
|
eatCFWS( scursor, send, isCRLF );
|
|
if ( scursor == send ) return false;
|
|
|
|
// success: store maybeYear in maybeDateTime:
|
|
maybeDateTime.tm_year = maybeYear - 1900;
|
|
|
|
//
|
|
// time
|
|
//
|
|
int maybeHour, maybeMinute, maybeSecond;
|
|
long int secsEastOfGMT;
|
|
bool timeZoneKnown = true;
|
|
|
|
if ( !parseTime( scursor, send,
|
|
maybeHour, maybeMinute, maybeSecond,
|
|
secsEastOfGMT, timeZoneKnown, isCRLF ) )
|
|
return false;
|
|
|
|
// success: store everything in maybeDateTime:
|
|
maybeDateTime.tm_hour = maybeHour;
|
|
maybeDateTime.tm_min = maybeMinute;
|
|
maybeDateTime.tm_sec = maybeSecond;
|
|
maybeDateTime.tm_isdst = DateFormatter::isDaylight();
|
|
// now put everything together and check if mktime(3) likes it:
|
|
result.time = mktime( &maybeDateTime );
|
|
if ( result.time == (time_t)(-1) ) return false;
|
|
|
|
// adjust to UTC/GMT:
|
|
//result.time -= secsEastOfGMT;
|
|
result.secsEastOfGMT = secsEastOfGMT;
|
|
result.timeZoneKnown = timeZoneKnown;
|
|
|
|
return true;
|
|
}
|
|
|
|
#if 0
|
|
bool tryToMakeAnySenseOfDateString( const char* & scursor,
|
|
const char * const send,
|
|
time_t & result, bool isCRLF )
|
|
{
|
|
return false;
|
|
}
|
|
#endif
|
|
|
|
} // namespace HeaderParsing
|
|
|
|
} // namespace KMime
|