|
|
|
/*
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
|
|
it under the terms of the GNU General Public License as published by
|
|
|
|
the Free Software Foundation; either version 2 of the License, or
|
|
|
|
(at your option) any later version.
|
|
|
|
*/
|
|
|
|
|
|
|
|
/*
|
|
|
|
The Original Code is mozilla.org code.
|
|
|
|
See http://lxr.mozilla.org/mozilla/source/modules/rdf/src/utils.c#540
|
|
|
|
|
|
|
|
Copyright (C) 1998 Netscape Communications Corporation
|
|
|
|
Copyright (C) 2005 Ismail Donmez <ismail@kde.org>
|
|
|
|
*/
|
|
|
|
|
|
|
|
#define kLeft1BitMask 0x80
|
|
|
|
#define kLeft2BitsMask 0xC0
|
|
|
|
#define kLeft3BitsMask 0xE0
|
|
|
|
#define kLeft4BitsMask 0xF0
|
|
|
|
#define kLeft5BitsMask 0xF8
|
|
|
|
#define kLeft6BitsMask 0xFC
|
|
|
|
#define kLeft7BitsMask 0xFE
|
|
|
|
|
|
|
|
#define k2BytesLeadByte kLeft2BitsMask
|
|
|
|
#define k3BytesLeadByte kLeft3BitsMask
|
|
|
|
#define k4BytesLeadByte kLeft4BitsMask
|
|
|
|
#define k5BytesLeadByte kLeft5BitsMask
|
|
|
|
#define k6BytesLeadByte kLeft6BitsMask
|
|
|
|
#define kTrialByte kLeft1BitMask
|
|
|
|
|
|
|
|
#define UTF8_1Byte(c) ( 0 == ((c) & kLeft1BitMask))
|
|
|
|
#define UTF8_2Bytes(c) ( k2BytesLeadByte == ((c) & kLeft3BitsMask))
|
|
|
|
#define UTF8_3Bytes(c) ( k3BytesLeadByte == ((c) & kLeft4BitsMask))
|
|
|
|
#define UTF8_4Bytes(c) ( k4BytesLeadByte == ((c) & kLeft5BitsMask))
|
|
|
|
#define UTF8_5Bytes(c) ( k5BytesLeadByte == ((c) & kLeft6BitsMask))
|
|
|
|
#define UTF8_6Bytes(c) ( k6BytesLeadByte == ((c) & kLeft7BitsMask))
|
|
|
|
#define UTF8_ValidTrialByte(c) ( kTrialByte == ((c) & kLeft2BitsMask))
|
|
|
|
|
|
|
|
|
|
|
|
bool isUtf8(const TQCString& text)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
int j;
|
|
|
|
int clen = 0;
|
|
|
|
int len = text.length();
|
|
|
|
|
|
|
|
JapaneseCode* jc = new JapaneseCode();
|
|
|
|
|
|
|
|
JapaneseCode::Type result = jc->guess_jp(text, len);
|
|
|
|
|
|
|
|
switch(result)
|
|
|
|
{
|
|
|
|
case JapaneseCode::SJIS:
|
|
|
|
case JapaneseCode::JIS:
|
|
|
|
delete jc;
|
|
|
|
return false;
|
|
|
|
default:
|
|
|
|
delete jc;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
for(i=0; i < len; i += clen)
|
|
|
|
{
|
|
|
|
if(UTF8_1Byte(text[i]))
|
|
|
|
{
|
|
|
|
clen = 1;
|
|
|
|
}
|
|
|
|
else if(UTF8_2Bytes(text[i]))
|
|
|
|
{
|
|
|
|
clen = 2;
|
|
|
|
|
|
|
|
/* No enough trail bytes */
|
|
|
|
if( (i + clen) > len)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
/* 0000 0000 - 0000 007F : should encode in less bytes */
|
|
|
|
if(0 == (text[i] & 0x1E ))
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
else if(UTF8_3Bytes(text[i]))
|
|
|
|
{
|
|
|
|
clen = 3;
|
|
|
|
|
|
|
|
/* No enough trail bytes */
|
|
|
|
if( (i + clen) > len)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
/* a single Surrogate should not show in 3 bytes UTF8, instead, the pair should be intepreted
|
|
|
|
as one single UCS4 char and encoded UTF8 in 4 bytes */
|
|
|
|
if((TQChar(0xED) == text[i] ) && (0xA0 == (text[i+1] & 0xA0 ) ))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
/* 0000 0000 - 0000 07FF : should encode in less bytes */
|
|
|
|
if((0 == (text[i] & 0x0F )) && (0 == (text[i+1] & 0x20 ) ))
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
else if(UTF8_4Bytes(text[i]))
|
|
|
|
{
|
|
|
|
clen = 4;
|
|
|
|
|
|
|
|
/* No enough trail bytes */
|
|
|
|
if( (i + clen) > len)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
/* 0000 0000 - 0000 FFFF : should encode in less bytes */
|
|
|
|
if((0 == (text[i] & 0x07 )) && (0 == (text[i+1] & 0x30 )) )
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
else if(UTF8_5Bytes(text[i]))
|
|
|
|
{
|
|
|
|
clen = 5;
|
|
|
|
|
|
|
|
/* No enough trail bytes */
|
|
|
|
if( (i + clen) > len)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
/* 0000 0000 - 001F FFFF : should encode in less bytes */
|
|
|
|
if((0 == (text[i] & 0x03 )) && (0 == (text[i+1] & 0x38 )) )
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
else if(UTF8_6Bytes(text[i]))
|
|
|
|
{
|
|
|
|
clen = 6;
|
|
|
|
|
|
|
|
/* No enough trail bytes */
|
|
|
|
if( (i + clen) > len)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
/* 0000 0000 - 03FF FFFF : should encode in less bytes */
|
|
|
|
if((0 == (text[i] & 0x01 )) && (0 == (text[i+1] & 0x3E )) )
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
for(j = 1; j<clen ;++j)
|
|
|
|
{
|
|
|
|
if(! UTF8_ValidTrialByte(text[i+j])) /* Trail bytes invalid */
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|