You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
669 lines
16 KiB
669 lines
16 KiB
/**********************************************************************
|
|
*
|
|
* rfcdecoder.cc - handler for various rfc/mime encodings
|
|
* Copyright (C) 2000 s.carstens@gmx.de
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation; either version 2 of the License, or
|
|
* (at your option) any later version.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this program; if not, write to the Free Software
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
|
*
|
|
* Send comments and bug fixes to s.carstens@gmx.de
|
|
*
|
|
*********************************************************************/
|
|
#include "rfcdecoder.h"
|
|
|
|
#include <ctype.h>
|
|
#include <sys/types.h>
|
|
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
|
|
#include <tqtextcodec.h>
|
|
#include <tqbuffer.h>
|
|
#include <tqregexp.h>
|
|
#include <kmdcodec.h>
|
|
|
|
// This part taken from rfc 2192 IMAP URL Scheme. C. Newman. September 1997.
|
|
// adapted to QT-Toolkit by Sven Carstens <s.carstens@gmx.de> 2000
|
|
|
|
static unsigned char base64chars[] =
|
|
"ABCDEFGHIJKLMNOPTQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,";
|
|
#define UNDEFINED 64
|
|
#define MAXLINE 76
|
|
|
|
/* UTF16 definitions */
|
|
#define UTF16MASK 0x03FFUL
|
|
#define UTF16SHIFT 10
|
|
#define UTF16BASE 0x10000UL
|
|
#define UTF16HIGHSTART 0xD800UL
|
|
#define UTF16HIGHEND 0xDBFFUL
|
|
#define UTF16LOSTART 0xDC00UL
|
|
#define UTF16LOEND 0xDFFFUL
|
|
|
|
/* Convert an IMAP mailbox to a Unicode path
|
|
*/
|
|
TQString rfcDecoder::fromIMAP (const TQString & inSrc)
|
|
{
|
|
unsigned char c, i, bitcount;
|
|
unsigned long ucs4, utf16, bitbuf;
|
|
unsigned char base64[256], utf8[6];
|
|
unsigned long srcPtr = 0;
|
|
TQCString dst;
|
|
TQCString src = inSrc.ascii ();
|
|
uint srcLen = inSrc.length();
|
|
|
|
/* initialize modified base64 decoding table */
|
|
memset (base64, UNDEFINED, sizeof (base64));
|
|
for (i = 0; i < sizeof (base64chars); ++i)
|
|
{
|
|
base64[(int)base64chars[i]] = i;
|
|
}
|
|
|
|
/* loop until end of string */
|
|
while (srcPtr < srcLen)
|
|
{
|
|
c = src[srcPtr++];
|
|
/* deal with literal characters and &- */
|
|
if (c != '&' || src[srcPtr] == '-')
|
|
{
|
|
/* encode literally */
|
|
dst += c;
|
|
/* skip over the '-' if this is an &- sequence */
|
|
if (c == '&')
|
|
srcPtr++;
|
|
}
|
|
else
|
|
{
|
|
/* convert modified UTF-7 -> UTF-16 -> UCS-4 -> UTF-8 -> HEX */
|
|
bitbuf = 0;
|
|
bitcount = 0;
|
|
ucs4 = 0;
|
|
while ((c = base64[(unsigned char) src[srcPtr]]) != UNDEFINED)
|
|
{
|
|
++srcPtr;
|
|
bitbuf = (bitbuf << 6) | c;
|
|
bitcount += 6;
|
|
/* enough bits for a UTF-16 character? */
|
|
if (bitcount >= 16)
|
|
{
|
|
bitcount -= 16;
|
|
utf16 = (bitcount ? bitbuf >> bitcount : bitbuf) & 0xffff;
|
|
/* convert UTF16 to UCS4 */
|
|
if (utf16 >= UTF16HIGHSTART && utf16 <= UTF16HIGHEND)
|
|
{
|
|
ucs4 = (utf16 - UTF16HIGHSTART) << UTF16SHIFT;
|
|
continue;
|
|
}
|
|
else if (utf16 >= UTF16LOSTART && utf16 <= UTF16LOEND)
|
|
{
|
|
ucs4 += utf16 - UTF16LOSTART + UTF16BASE;
|
|
}
|
|
else
|
|
{
|
|
ucs4 = utf16;
|
|
}
|
|
/* convert UTF-16 range of UCS4 to UTF-8 */
|
|
if (ucs4 <= 0x7fUL)
|
|
{
|
|
utf8[0] = ucs4;
|
|
i = 1;
|
|
}
|
|
else if (ucs4 <= 0x7ffUL)
|
|
{
|
|
utf8[0] = 0xc0 | (ucs4 >> 6);
|
|
utf8[1] = 0x80 | (ucs4 & 0x3f);
|
|
i = 2;
|
|
}
|
|
else if (ucs4 <= 0xffffUL)
|
|
{
|
|
utf8[0] = 0xe0 | (ucs4 >> 12);
|
|
utf8[1] = 0x80 | ((ucs4 >> 6) & 0x3f);
|
|
utf8[2] = 0x80 | (ucs4 & 0x3f);
|
|
i = 3;
|
|
}
|
|
else
|
|
{
|
|
utf8[0] = 0xf0 | (ucs4 >> 18);
|
|
utf8[1] = 0x80 | ((ucs4 >> 12) & 0x3f);
|
|
utf8[2] = 0x80 | ((ucs4 >> 6) & 0x3f);
|
|
utf8[3] = 0x80 | (ucs4 & 0x3f);
|
|
i = 4;
|
|
}
|
|
/* copy it */
|
|
for (c = 0; c < i; ++c)
|
|
{
|
|
dst += utf8[c];
|
|
}
|
|
}
|
|
}
|
|
/* skip over trailing '-' in modified UTF-7 encoding */
|
|
if (src[srcPtr] == '-')
|
|
++srcPtr;
|
|
}
|
|
}
|
|
return TQString::fromUtf8 (dst.data ());
|
|
}
|
|
|
|
/* replace " with \" and \ with \\ " and \ characters */
|
|
TQString rfcDecoder::quoteIMAP(const TQString &src)
|
|
{
|
|
uint len = src.length();
|
|
TQString result;
|
|
result.reserve(2 * len);
|
|
for (unsigned int i = 0; i < len; i++)
|
|
{
|
|
if (src[i] == '"' || src[i] == '\\')
|
|
result += '\\';
|
|
result += src[i];
|
|
}
|
|
//result.squeeze(); - unnecessary and slow
|
|
return result;
|
|
}
|
|
|
|
/* Convert Unicode path to modified UTF-7 IMAP mailbox
|
|
*/
|
|
TQString rfcDecoder::toIMAP (const TQString & inSrc)
|
|
{
|
|
unsigned int utf8pos, utf8total, c, utf7mode, bitstogo, utf16flag;
|
|
unsigned long ucs4, bitbuf;
|
|
TQCString src = inSrc.utf8 ();
|
|
TQString dst;
|
|
|
|
ulong srcPtr = 0;
|
|
utf7mode = 0;
|
|
utf8total = 0;
|
|
bitstogo = 0;
|
|
utf8pos = 0;
|
|
bitbuf = 0;
|
|
ucs4 = 0;
|
|
while (srcPtr < src.length ())
|
|
{
|
|
c = (unsigned char) src[srcPtr++];
|
|
/* normal character? */
|
|
if (c >= ' ' && c <= '~')
|
|
{
|
|
/* switch out of UTF-7 mode */
|
|
if (utf7mode)
|
|
{
|
|
if (bitstogo)
|
|
{
|
|
dst += base64chars[(bitbuf << (6 - bitstogo)) & 0x3F];
|
|
bitstogo = 0;
|
|
}
|
|
dst += '-';
|
|
utf7mode = 0;
|
|
}
|
|
dst += c;
|
|
/* encode '&' as '&-' */
|
|
if (c == '&')
|
|
{
|
|
dst += '-';
|
|
}
|
|
continue;
|
|
}
|
|
/* switch to UTF-7 mode */
|
|
if (!utf7mode)
|
|
{
|
|
dst += '&';
|
|
utf7mode = 1;
|
|
}
|
|
/* Encode US-ASCII characters as themselves */
|
|
if (c < 0x80)
|
|
{
|
|
ucs4 = c;
|
|
utf8total = 1;
|
|
}
|
|
else if (utf8total)
|
|
{
|
|
/* save UTF8 bits into UCS4 */
|
|
ucs4 = (ucs4 << 6) | (c & 0x3FUL);
|
|
if (++utf8pos < utf8total)
|
|
{
|
|
continue;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
utf8pos = 1;
|
|
if (c < 0xE0)
|
|
{
|
|
utf8total = 2;
|
|
ucs4 = c & 0x1F;
|
|
}
|
|
else if (c < 0xF0)
|
|
{
|
|
utf8total = 3;
|
|
ucs4 = c & 0x0F;
|
|
}
|
|
else
|
|
{
|
|
/* NOTE: can't convert UTF8 sequences longer than 4 */
|
|
utf8total = 4;
|
|
ucs4 = c & 0x03;
|
|
}
|
|
continue;
|
|
}
|
|
/* loop to split ucs4 into two utf16 chars if necessary */
|
|
utf8total = 0;
|
|
do
|
|
{
|
|
if (ucs4 >= UTF16BASE)
|
|
{
|
|
ucs4 -= UTF16BASE;
|
|
bitbuf = (bitbuf << 16) | ((ucs4 >> UTF16SHIFT) + UTF16HIGHSTART);
|
|
ucs4 = (ucs4 & UTF16MASK) + UTF16LOSTART;
|
|
utf16flag = 1;
|
|
}
|
|
else
|
|
{
|
|
bitbuf = (bitbuf << 16) | ucs4;
|
|
utf16flag = 0;
|
|
}
|
|
bitstogo += 16;
|
|
/* spew out base64 */
|
|
while (bitstogo >= 6)
|
|
{
|
|
bitstogo -= 6;
|
|
dst += base64chars[(bitstogo ? (bitbuf >> bitstogo) : bitbuf) & 0x3F];
|
|
}
|
|
}
|
|
while (utf16flag);
|
|
}
|
|
/* if in UTF-7 mode, finish in ASCII */
|
|
if (utf7mode)
|
|
{
|
|
if (bitstogo)
|
|
{
|
|
dst += base64chars[(bitbuf << (6 - bitstogo)) & 0x3F];
|
|
}
|
|
dst += '-';
|
|
}
|
|
return quoteIMAP(dst);
|
|
}
|
|
|
|
//-----------------------------------------------------------------------------
|
|
TQString rfcDecoder::decodeQuoting(const TQString &aStr)
|
|
{
|
|
TQString result;
|
|
unsigned int strLength(aStr.length());
|
|
for (unsigned int i = 0; i < strLength ; i++)
|
|
{
|
|
if (aStr[i] == "\\") i++;
|
|
result += aStr[i];
|
|
}
|
|
return result;
|
|
}
|
|
|
|
//-----------------------------------------------------------------------------
|
|
TQTextCodec *
|
|
rfcDecoder::codecForName (const TQString & _str)
|
|
{
|
|
if (_str.isEmpty ())
|
|
return NULL;
|
|
return TQTextCodec::codecForName (_str.lower ().
|
|
replace ("windows", "cp").latin1 ());
|
|
}
|
|
|
|
//-----------------------------------------------------------------------------
|
|
const TQString
|
|
rfcDecoder::decodeRFC2047String (const TQString & _str)
|
|
{
|
|
TQString throw_away;
|
|
|
|
return decodeRFC2047String (_str, throw_away);
|
|
}
|
|
|
|
//-----------------------------------------------------------------------------
|
|
const TQString
|
|
rfcDecoder::decodeRFC2047String (const TQString & _str, TQString & charset)
|
|
{
|
|
TQString throw_away;
|
|
|
|
return decodeRFC2047String (_str, charset, throw_away);
|
|
}
|
|
|
|
//-----------------------------------------------------------------------------
|
|
const TQString
|
|
rfcDecoder::decodeRFC2047String (const TQString & _str, TQString & charset,
|
|
TQString & language)
|
|
{
|
|
//do we have a rfc string
|
|
if (_str.find("=?") < 0)
|
|
return _str;
|
|
|
|
TQCString aStr = _str.ascii (); // TQString.length() means Unicode chars
|
|
TQCString result;
|
|
char *pos, *beg, *end, *mid = NULL;
|
|
TQCString str;
|
|
char encoding = 0, ch;
|
|
bool valid;
|
|
const int maxLen = 200;
|
|
int i;
|
|
|
|
// result.truncate(aStr.length());
|
|
for (pos = aStr.data (); *pos; pos++)
|
|
{
|
|
if (pos[0] != '=' || pos[1] != '?')
|
|
{
|
|
result += *pos;
|
|
continue;
|
|
}
|
|
beg = pos + 2;
|
|
end = beg;
|
|
valid = TRUE;
|
|
// parse charset name
|
|
for (i = 2, pos += 2;
|
|
i < maxLen && (*pos != '?' && (ispunct (*pos) || isalnum (*pos)));
|
|
i++)
|
|
pos++;
|
|
if (*pos != '?' || i < 4 || i >= maxLen)
|
|
valid = FALSE;
|
|
else
|
|
{
|
|
charset = TQCString (beg, i - 1); // -2 + 1 for the zero
|
|
int pt = charset.findRev('*');
|
|
if (pt != -1)
|
|
{
|
|
// save language for later usage
|
|
language = charset.right (charset.length () - pt - 1);
|
|
|
|
// tie off language as defined in rfc2047
|
|
charset.truncate(pt);
|
|
}
|
|
// get encoding and check delimiting question marks
|
|
encoding = toupper (pos[1]);
|
|
if (pos[2] != '?'
|
|
|| (encoding != 'Q' && encoding != 'B' && encoding != 'q'
|
|
&& encoding != 'b'))
|
|
valid = FALSE;
|
|
pos += 3;
|
|
i += 3;
|
|
// kdDebug(7116) << "rfcDecoder::decodeRFC2047String - charset " << charset << " - language " << language << " - '" << pos << "'" << endl;
|
|
}
|
|
if (valid)
|
|
{
|
|
mid = pos;
|
|
// search for end of encoded part
|
|
while (i < maxLen && *pos && !(*pos == '?' && *(pos + 1) == '='))
|
|
{
|
|
i++;
|
|
pos++;
|
|
}
|
|
end = pos + 2; //end now points to the first char after the encoded string
|
|
if (i >= maxLen || !*pos)
|
|
valid = FALSE;
|
|
}
|
|
if (valid)
|
|
{
|
|
ch = *pos;
|
|
*pos = '\0';
|
|
str = TQCString (mid).left ((int) (mid - pos - 1));
|
|
if (encoding == 'Q')
|
|
{
|
|
// decode quoted printable text
|
|
for (i = str.length () - 1; i >= 0; i--)
|
|
if (str[i] == '_')
|
|
str[i] = ' ';
|
|
// kdDebug(7116) << "rfcDecoder::decodeRFC2047String - before QP '" << str << "'" << endl;
|
|
|
|
str = KCodecs::quotedPrintableDecode(str);
|
|
// kdDebug(7116) << "rfcDecoder::decodeRFC2047String - after QP '" << str << "'" << endl;
|
|
}
|
|
else
|
|
{
|
|
// decode base64 text
|
|
str = KCodecs::base64Decode(str);
|
|
}
|
|
*pos = ch;
|
|
int len = str.length();
|
|
for (i = 0; i < len; i++)
|
|
result += (char) (TQChar) str[i];
|
|
|
|
pos = end - 1;
|
|
}
|
|
else
|
|
{
|
|
// kdDebug(7116) << "rfcDecoder::decodeRFC2047String - invalid" << endl;
|
|
//result += "=?";
|
|
//pos = beg -1; // because pos gets increased shortly afterwards
|
|
pos = beg - 2;
|
|
result += *pos++;
|
|
result += *pos;
|
|
}
|
|
}
|
|
if (!charset.isEmpty ())
|
|
{
|
|
TQTextCodec *aCodec = codecForName (charset.ascii ());
|
|
if (aCodec)
|
|
{
|
|
// kdDebug(7116) << "Codec is " << aCodec->name() << endl;
|
|
return aCodec->toUnicode (result);
|
|
}
|
|
}
|
|
return result;
|
|
}
|
|
|
|
|
|
//-----------------------------------------------------------------------------
|
|
const char especials[17] = "()<>@,;:\"/[]?.= ";
|
|
|
|
const TQString
|
|
rfcDecoder::encodeRFC2047String (const TQString & _str)
|
|
{
|
|
if (_str.isEmpty ())
|
|
return _str;
|
|
const signed char *latin = reinterpret_cast<const signed char *>(_str.latin1()), *l, *start, *stop;
|
|
char hexcode;
|
|
int numQuotes, i;
|
|
int rptr = 0;
|
|
// My stats show this number results in 12 resize() out of 73,000
|
|
int resultLen = 3 * _str.length() / 2;
|
|
TQCString result(resultLen);
|
|
|
|
while (*latin)
|
|
{
|
|
l = latin;
|
|
start = latin;
|
|
while (*l)
|
|
{
|
|
if (*l == 32)
|
|
start = l + 1;
|
|
if (*l < 0)
|
|
break;
|
|
l++;
|
|
}
|
|
if (*l)
|
|
{
|
|
numQuotes = 1;
|
|
while (*l)
|
|
{
|
|
/* The encoded word must be limited to 75 character */
|
|
for (i = 0; i < 16; i++)
|
|
if (*l == especials[i])
|
|
numQuotes++;
|
|
if (*l < 0)
|
|
numQuotes++;
|
|
/* Stop after 58 = 75 - 17 characters or at "<user@host..." */
|
|
if (l - start + 2 * numQuotes >= 58 || *l == 60)
|
|
break;
|
|
l++;
|
|
}
|
|
if (*l)
|
|
{
|
|
stop = l - 1;
|
|
while (stop >= start && *stop != 32)
|
|
stop--;
|
|
if (stop <= start)
|
|
stop = l;
|
|
}
|
|
else
|
|
stop = l;
|
|
if (resultLen - rptr - 1 <= start - latin + 1 + 16 /* =?iso-88... */) {
|
|
resultLen += (start - latin + 1) * 2 + 20; // more space
|
|
result.resize(resultLen);
|
|
}
|
|
while (latin < start)
|
|
{
|
|
result[rptr++] = *latin;
|
|
latin++;
|
|
}
|
|
strcpy(&result[rptr], "=?iso-8859-1?q?"); rptr += 15;
|
|
if (resultLen - rptr - 1 <= 3*(stop - latin + 1)) {
|
|
resultLen += (stop - latin + 1) * 4 + 20; // more space
|
|
result.resize(resultLen);
|
|
}
|
|
while (latin < stop) // can add up to 3 chars/iteration
|
|
{
|
|
numQuotes = 0;
|
|
for (i = 0; i < 16; i++)
|
|
if (*latin == especials[i])
|
|
numQuotes = 1;
|
|
if (*latin < 0)
|
|
numQuotes = 1;
|
|
if (numQuotes)
|
|
{
|
|
result[rptr++] = '=';
|
|
hexcode = ((*latin & 0xF0) >> 4) + 48;
|
|
if (hexcode >= 58)
|
|
hexcode += 7;
|
|
result[rptr++] = hexcode;
|
|
hexcode = (*latin & 0x0F) + 48;
|
|
if (hexcode >= 58)
|
|
hexcode += 7;
|
|
result[rptr++] = hexcode;
|
|
}
|
|
else
|
|
{
|
|
result[rptr++] = *latin;
|
|
}
|
|
latin++;
|
|
}
|
|
result[rptr++] = '?';
|
|
result[rptr++] = '=';
|
|
}
|
|
else
|
|
{
|
|
while (*latin)
|
|
{
|
|
if (rptr == resultLen - 1) {
|
|
resultLen += 30;
|
|
result.resize(resultLen);
|
|
}
|
|
result[rptr++] = *latin;
|
|
latin++;
|
|
}
|
|
}
|
|
}
|
|
result[rptr] = 0;
|
|
//free (latinStart);
|
|
return result;
|
|
}
|
|
|
|
|
|
//-----------------------------------------------------------------------------
|
|
const TQString
|
|
rfcDecoder::encodeRFC2231String (const TQString & _str)
|
|
{
|
|
if (_str.isEmpty ())
|
|
return _str;
|
|
signed char *latin = (signed char *) calloc (1, _str.length () + 1);
|
|
char *latin_us = (char *) latin;
|
|
strcpy (latin_us, _str.latin1 ());
|
|
signed char *l = latin;
|
|
char hexcode;
|
|
int i;
|
|
bool quote;
|
|
while (*l)
|
|
{
|
|
if (*l < 0)
|
|
break;
|
|
l++;
|
|
}
|
|
if (!*l) {
|
|
free(latin);
|
|
return _str.ascii ();
|
|
}
|
|
TQCString result;
|
|
l = latin;
|
|
while (*l)
|
|
{
|
|
quote = *l < 0;
|
|
for (i = 0; i < 16; i++)
|
|
if (*l == especials[i])
|
|
quote = true;
|
|
if (quote)
|
|
{
|
|
result += "%";
|
|
hexcode = ((*l & 0xF0) >> 4) + 48;
|
|
if (hexcode >= 58)
|
|
hexcode += 7;
|
|
result += hexcode;
|
|
hexcode = (*l & 0x0F) + 48;
|
|
if (hexcode >= 58)
|
|
hexcode += 7;
|
|
result += hexcode;
|
|
}
|
|
else
|
|
{
|
|
result += *l;
|
|
}
|
|
l++;
|
|
}
|
|
free (latin);
|
|
return result;
|
|
}
|
|
|
|
|
|
//-----------------------------------------------------------------------------
|
|
const TQString
|
|
rfcDecoder::decodeRFC2231String (const TQString & _str)
|
|
{
|
|
int p = _str.find ('\'');
|
|
|
|
//see if it is an rfc string
|
|
if (p < 0)
|
|
return _str;
|
|
|
|
int l = _str.findRev ('\'');
|
|
|
|
//second is language
|
|
if (p >= l)
|
|
return _str;
|
|
|
|
//first is charset or empty
|
|
TQString charset = _str.left (p);
|
|
TQString st = _str.mid (l + 1);
|
|
TQString language = _str.mid (p + 1, l - p - 1);
|
|
|
|
//kdDebug(7116) << "Charset: " << charset << " Language: " << language << endl;
|
|
|
|
char ch, ch2;
|
|
p = 0;
|
|
while (p < (int) st.length ())
|
|
{
|
|
if (st.at (p) == 37)
|
|
{
|
|
ch = st.at (p + 1).latin1 () - 48;
|
|
if (ch > 16)
|
|
ch -= 7;
|
|
ch2 = st.at (p + 2).latin1 () - 48;
|
|
if (ch2 > 16)
|
|
ch2 -= 7;
|
|
st.at (p) = ch * 16 + ch2;
|
|
st.remove (p + 1, 2);
|
|
}
|
|
p++;
|
|
}
|
|
return st;
|
|
}
|