You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
516 lines
13 KiB
516 lines
13 KiB
/*
|
|
|
|
Copyright (C) 2010 Timothy Pearson <kb9vqf@pearsoncomputing.net>
|
|
|
|
This library is free software; you can redistribute it and/or
|
|
modify it under the terms of the GNU General Public
|
|
License as published by the Free Software Foundation; either
|
|
version 2 of the License, or (at your option) any later version.
|
|
|
|
This library is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
Library General Public License for more details.
|
|
|
|
You should have received a copy of the GNU Library General Public License
|
|
along with this library; see the file COPYING.LIB. If not, write to
|
|
the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
|
|
Boston, MA 02110-1301, USA.
|
|
|
|
*/
|
|
|
|
#include <tqt.h>
|
|
#include <ntqtextcodec.h>
|
|
|
|
#ifdef USE_QT4
|
|
|
|
// returns a string containing the letters and numbers from input,
|
|
// with a space separating run of a character class. e.g. "iso8859-1"
|
|
// becomes "iso 8859 1"
|
|
static QString lettersAndNumbers( const char * input )
|
|
{
|
|
QString result;
|
|
QChar c;
|
|
|
|
while( input && *input ) {
|
|
c = *input;
|
|
if ( c.isLetter() || c.isNumber() )
|
|
result += c.lower();
|
|
if ( input[1] ) {
|
|
// add space at character class transition, except
|
|
// transition from upper-case to lower-case letter
|
|
QChar n( input[1] );
|
|
if ( c.isLetter() && n.isLetter() ) {
|
|
if ( c == c.lower() && n == n.upper() )
|
|
result += ' ';
|
|
} else if ( c.category() != n.category() ) {
|
|
result += ' ';
|
|
}
|
|
}
|
|
input++;
|
|
}
|
|
return result.simplifyWhiteSpace();
|
|
}
|
|
|
|
#define CHAINED 0xffff
|
|
|
|
struct QMultiByteUnicodeTable {
|
|
// If multiByte, ignore unicode and index into multiByte
|
|
// with the next character.
|
|
QMultiByteUnicodeTable() : unicode(0xfffd), multiByte(0) { }
|
|
|
|
~QMultiByteUnicodeTable()
|
|
{
|
|
if ( multiByte )
|
|
delete [] multiByte;
|
|
}
|
|
|
|
ushort unicode;
|
|
QMultiByteUnicodeTable* multiByte;
|
|
};
|
|
|
|
static int getByte(char* &cursor)
|
|
{
|
|
int byte = 0;
|
|
if ( *cursor ) {
|
|
if ( cursor[1] == 'x' )
|
|
byte = strtol(cursor+2,&cursor,16);
|
|
else if ( cursor[1] == 'd' )
|
|
byte = strtol(cursor+2,&cursor,10);
|
|
else
|
|
byte = strtol(cursor+2,&cursor,8);
|
|
}
|
|
return byte&0xff;
|
|
}
|
|
|
|
class QTextCodecFromIOD;
|
|
|
|
class QTextCodecFromIODDecoder : public QTextDecoder {
|
|
const QTextCodecFromIOD* codec;
|
|
QMultiByteUnicodeTable* mb;
|
|
public:
|
|
QTextCodecFromIODDecoder(const QTextCodecFromIOD* c);
|
|
//QString toUnicode(const char* chars, int len);
|
|
QString convertToUnicode(const char* chars, int len, int *state);
|
|
};
|
|
|
|
class QTextCodecFromIOD : public QTextCodec {
|
|
friend class QTextCodecFromIODDecoder;
|
|
|
|
TQCString n;
|
|
|
|
// If from_unicode_page[row()][cell()] is 0 and from_unicode_page_multiByte,
|
|
// use from_unicode_page_multiByte[row()][cell()] as string.
|
|
char** from_unicode_page;
|
|
char*** from_unicode_page_multiByte;
|
|
char unkn;
|
|
|
|
// Only one of these is used
|
|
ushort* to_unicode;
|
|
QMultiByteUnicodeTable* to_unicode_multiByte;
|
|
int max_bytes_per_char;
|
|
TQStrList aliases;
|
|
|
|
bool stateless() const { return !to_unicode_multiByte; }
|
|
|
|
public:
|
|
QTextCodecFromIOD(QIODevice* iod)
|
|
{
|
|
from_unicode_page = 0;
|
|
to_unicode_multiByte = 0;
|
|
to_unicode = 0;
|
|
from_unicode_page_multiByte = 0;
|
|
max_bytes_per_char = 1;
|
|
|
|
const int maxlen=100;
|
|
char line[maxlen];
|
|
char esc='\\';
|
|
char comm='%';
|
|
bool incmap = FALSE;
|
|
while (iod->readLine(line,maxlen) > 0) {
|
|
if (0==qstrnicmp(line,"<code_set_name>",15))
|
|
n = line+15;
|
|
else if (0==qstrnicmp(line,"<escape_char> ",14))
|
|
esc = line[14];
|
|
else if (0==qstrnicmp(line,"<comment_char> ",15))
|
|
comm = line[15];
|
|
else if (line[0]==comm && 0==qstrnicmp(line+1," alias ",7)) {
|
|
aliases.append(line+8);
|
|
} else if (0==qstrnicmp(line,"CHARMAP",7)) {
|
|
if (!from_unicode_page) {
|
|
from_unicode_page = new char*[256];
|
|
for (int i=0; i<256; i++)
|
|
from_unicode_page[i]=0;
|
|
}
|
|
if (!to_unicode) {
|
|
to_unicode = new ushort[256];
|
|
}
|
|
incmap = TRUE;
|
|
} else if (0==qstrnicmp(line,"END CHARMAP",11))
|
|
break;
|
|
else if (incmap) {
|
|
char* cursor = line;
|
|
int byte=-1,unicode=-1;
|
|
ushort* mb_unicode=0;
|
|
const int maxmb=8; // more -> we'll need to improve datastructures
|
|
char mb[maxmb+1];
|
|
int nmb=0;
|
|
|
|
while (*cursor) {
|
|
if (cursor[0]=='<' && cursor[1]=='U' &&
|
|
cursor[2]>='0' && cursor[2]<='9' &&
|
|
cursor[3]>='0' && cursor[3]<='9') {
|
|
|
|
unicode = strtol(cursor+2,&cursor,16);
|
|
|
|
} else if (*cursor==esc) {
|
|
|
|
byte = getByte(cursor);
|
|
|
|
if ( *cursor == esc ) {
|
|
if ( !to_unicode_multiByte ) {
|
|
to_unicode_multiByte =
|
|
new QMultiByteUnicodeTable[256];
|
|
for (int i=0; i<256; i++) {
|
|
to_unicode_multiByte[i].unicode =
|
|
to_unicode[i];
|
|
to_unicode_multiByte[i].multiByte = 0;
|
|
}
|
|
delete [] to_unicode;
|
|
to_unicode = 0;
|
|
}
|
|
QMultiByteUnicodeTable* mbut =
|
|
to_unicode_multiByte+byte;
|
|
mb[nmb++] = byte;
|
|
while ( nmb < maxmb && *cursor == esc ) {
|
|
// Always at least once
|
|
|
|
mbut->unicode = CHAINED;
|
|
byte = getByte(cursor);
|
|
mb[nmb++] = byte;
|
|
if (!mbut->multiByte) {
|
|
mbut->multiByte =
|
|
new QMultiByteUnicodeTable[256];
|
|
}
|
|
mbut = mbut->multiByte+byte;
|
|
mb_unicode = & mbut->unicode;
|
|
}
|
|
|
|
if ( nmb > max_bytes_per_char )
|
|
max_bytes_per_char = nmb;
|
|
}
|
|
} else {
|
|
cursor++;
|
|
}
|
|
}
|
|
|
|
if (unicode >= 0 && unicode <= 0xffff)
|
|
{
|
|
QChar ch((ushort)unicode);
|
|
if (!from_unicode_page[ch.row()]) {
|
|
from_unicode_page[ch.row()] = new char[256];
|
|
for (int i=0; i<256; i++)
|
|
from_unicode_page[ch.row()][i]=0;
|
|
}
|
|
if ( mb_unicode ) {
|
|
from_unicode_page[ch.row()][ch.cell()] = 0;
|
|
if (!from_unicode_page_multiByte) {
|
|
from_unicode_page_multiByte = new char**[256];
|
|
for (int i=0; i<256; i++)
|
|
from_unicode_page_multiByte[i]=0;
|
|
}
|
|
if (!from_unicode_page_multiByte[ch.row()]) {
|
|
from_unicode_page_multiByte[ch.row()] = new char*[256];
|
|
for (int i=0; i<256; i++)
|
|
from_unicode_page_multiByte[ch.row()][i] = 0;
|
|
}
|
|
mb[nmb++] = 0;
|
|
from_unicode_page_multiByte[ch.row()][ch.cell()]
|
|
= qstrdup(mb);
|
|
*mb_unicode = unicode;
|
|
} else {
|
|
from_unicode_page[ch.row()][ch.cell()] = (char)byte;
|
|
if ( to_unicode )
|
|
to_unicode[byte] = unicode;
|
|
else
|
|
to_unicode_multiByte[byte].unicode = unicode;
|
|
}
|
|
} else {
|
|
}
|
|
}
|
|
}
|
|
n = n.stripWhiteSpace();
|
|
|
|
unkn = '?'; // ##### Might be a bad choice.
|
|
}
|
|
|
|
~QTextCodecFromIOD()
|
|
{
|
|
if ( from_unicode_page ) {
|
|
for (int i=0; i<256; i++)
|
|
if (from_unicode_page[i])
|
|
delete [] from_unicode_page[i];
|
|
}
|
|
if ( from_unicode_page_multiByte ) {
|
|
for (int i=0; i<256; i++)
|
|
if (from_unicode_page_multiByte[i])
|
|
for (int j=0; j<256; j++)
|
|
if (from_unicode_page_multiByte[i][j])
|
|
delete [] from_unicode_page_multiByte[i][j];
|
|
}
|
|
if ( to_unicode )
|
|
delete [] to_unicode;
|
|
if ( to_unicode_multiByte )
|
|
delete [] to_unicode_multiByte;
|
|
}
|
|
|
|
bool ok() const
|
|
{
|
|
return !!from_unicode_page;
|
|
}
|
|
|
|
QTextDecoder* makeDecoder() const
|
|
{
|
|
if ( stateless() )
|
|
return QTextCodec::makeDecoder();
|
|
else
|
|
return new QTextCodecFromIODDecoder(this);
|
|
}
|
|
|
|
const char* qtio_name() const
|
|
{
|
|
return n;
|
|
}
|
|
|
|
int mibEnum() const
|
|
{
|
|
return 0; // #### Unknown.
|
|
}
|
|
|
|
int heuristicContentMatch(const char*, int) const
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
int heuristicNameMatch(const char* hint) const
|
|
{
|
|
int bestr = QTextCodec::heuristicNameMatch(hint);
|
|
TQStrListIterator it(aliases);
|
|
char* a;
|
|
while ((a=it.current())) {
|
|
++it;
|
|
int r = simpleHeuristicNameMatch(a,hint);
|
|
if (r > bestr)
|
|
bestr = r;
|
|
}
|
|
return bestr;
|
|
}
|
|
|
|
QString toUnicode(const char* chars, int len) const
|
|
{
|
|
const uchar* uchars = (const uchar*)chars;
|
|
QString result;
|
|
QMultiByteUnicodeTable* multiByte=to_unicode_multiByte;
|
|
if ( multiByte ) {
|
|
while (len--) {
|
|
QMultiByteUnicodeTable& mb = multiByte[*uchars];
|
|
if ( mb.multiByte ) {
|
|
// Chained multi-byte
|
|
multiByte = mb.multiByte;
|
|
} else {
|
|
result += QChar(mb.unicode);
|
|
multiByte=to_unicode_multiByte;
|
|
}
|
|
uchars++;
|
|
}
|
|
} else {
|
|
while (len--)
|
|
result += QChar(to_unicode[*uchars++]);
|
|
}
|
|
return result;
|
|
}
|
|
|
|
QString convertToUnicode(const char* chars, int len, ConverterState *state) const
|
|
{
|
|
return toUnicode(chars, len);
|
|
}
|
|
|
|
#if !defined(Q_NO_USING_KEYWORD)
|
|
using QTextCodec::fromUnicode;
|
|
#endif
|
|
TQCString fromUnicode(const QString& uc, int& lenInOut) const
|
|
{
|
|
if (lenInOut > (int)uc.length())
|
|
lenInOut = uc.length();
|
|
int rlen = lenInOut*max_bytes_per_char;
|
|
TQCString rstr(rlen+1);
|
|
char* cursor = rstr.data();
|
|
char* s=0;
|
|
int l = lenInOut;
|
|
int lout = 0;
|
|
for (int i=0; i<l; i++) {
|
|
QChar ch = uc[i];
|
|
if ( ch == QChar() ) {
|
|
// special
|
|
*cursor++ = 0;
|
|
} else if ( from_unicode_page[ch.row()] &&
|
|
from_unicode_page[ch.row()][ch.cell()] )
|
|
{
|
|
*cursor++ = from_unicode_page[ch.row()][ch.cell()];
|
|
lout++;
|
|
} else if ( from_unicode_page_multiByte &&
|
|
from_unicode_page_multiByte[ch.row()] &&
|
|
(s=from_unicode_page_multiByte[ch.row()][ch.cell()]) )
|
|
{
|
|
while (*s) {
|
|
*cursor++ = *s++;
|
|
lout++;
|
|
}
|
|
} else {
|
|
*cursor++ = unkn;
|
|
lout++;
|
|
}
|
|
}
|
|
*cursor = 0;
|
|
lenInOut = lout;
|
|
return rstr;
|
|
}
|
|
|
|
QByteArray convertFromUnicode(const QChar *charin, int len, ConverterState *state) const
|
|
{
|
|
return fromUnicode(charin, len);
|
|
}
|
|
|
|
QByteArray name() const
|
|
{
|
|
return qtio_name();
|
|
}
|
|
};
|
|
|
|
// QTextCodecFromIODDecoder::QTextCodecFromIODDecoder(const QTextCodecFromIOD* c) :
|
|
// codec(c)
|
|
// {
|
|
// mb = codec->to_unicode_multiByte;
|
|
// }
|
|
|
|
QString QTextCodecFromIODDecoder::convertToUnicode(const char* chars, int len, int *state)
|
|
{
|
|
const uchar* uchars = (const uchar*)chars;
|
|
QString result;
|
|
while (len--) {
|
|
QMultiByteUnicodeTable& t = mb[*uchars];
|
|
if ( t.multiByte ) {
|
|
// Chained multi-byte
|
|
mb = t.multiByte;
|
|
} else {
|
|
if ( t.unicode )
|
|
result += QChar(t.unicode);
|
|
mb=codec->to_unicode_multiByte;
|
|
}
|
|
uchars++;
|
|
}
|
|
return result;
|
|
}
|
|
|
|
#ifndef TQT_NO_CODECS
|
|
// Cannot use <pre> or \code
|
|
/*!
|
|
Reads a POSIX2 charmap definition from \a iod.
|
|
The parser recognizes the following lines:
|
|
|
|
<font name="sans">
|
|
<code_set_name> <i>name</i></br>
|
|
<escape_char> <i>character</i></br>
|
|
% alias <i>alias</i></br>
|
|
CHARMAP</br>
|
|
<<i>token</i>> /x<i>hexbyte</i> <U<i>unicode</i>> ...</br>
|
|
<<i>token</i>> /d<i>decbyte</i> <U<i>unicode</i>> ...</br>
|
|
<<i>token</i>> /<i>octbyte</i> <U<i>unicode</i>> ...</br>
|
|
<<i>token</i>> /<i>any</i>/<i>any</i>... <U<i>unicode</i>> ...</br>
|
|
END CHARMAP</br>
|
|
</font>
|
|
|
|
The resulting QTextCodec is returned (and also added to the global
|
|
list of codecs). The name() of the result is taken from the
|
|
code_set_name.
|
|
|
|
Note that a codec constructed in this way uses much more memory
|
|
and is slower than a hand-written QTextCodec subclass, since
|
|
tables in code are kept in memory shared by all Qt applications.
|
|
|
|
\sa loadCharmapFile()
|
|
*/
|
|
QTextCodec* QTextCodec::loadCharmap(QIODevice* iod)
|
|
{
|
|
QTextCodecFromIOD* r = new QTextCodecFromIOD(iod);
|
|
if ( !r->ok() ) {
|
|
delete r;
|
|
r = 0;
|
|
}
|
|
return r;
|
|
}
|
|
|
|
/*!
|
|
A convenience function for loadCharmap() that loads the charmap
|
|
definition from the file \a filename.
|
|
*/
|
|
QTextCodec* QTextCodec::loadCharmapFile(QString filename)
|
|
{
|
|
QFile f(filename);
|
|
if (f.open(IO_ReadOnly)) {
|
|
QTextCodecFromIOD* r = new QTextCodecFromIOD(&f);
|
|
if ( !r->ok() )
|
|
delete r;
|
|
else
|
|
return r;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
/*!
|
|
Returns a value indicating how likely it is that this decoder is
|
|
appropriate for decoding some format that has the given name. The
|
|
name is compared with the \a hint.
|
|
|
|
A good match returns a positive number around the length of the
|
|
string. A bad match is negative.
|
|
|
|
The default implementation calls simpleHeuristicNameMatch() with
|
|
the name of the codec.
|
|
*/
|
|
int QTextCodec::heuristicNameMatch(const char* hint) const
|
|
{
|
|
return simpleHeuristicNameMatch(name(),hint);
|
|
}
|
|
|
|
/*!
|
|
A simple utility function for heuristicNameMatch(): it does some
|
|
very minor character-skipping so that almost-exact matches score
|
|
high. \a name is the text we're matching and \a hint is used for
|
|
the comparison.
|
|
*/
|
|
int QTextCodec::simpleHeuristicNameMatch(const char* name, const char* hint)
|
|
{
|
|
// if they're the same, return a perfect score.
|
|
if ( name && hint && *name && *hint && qstricmp( name, hint ) == 0 )
|
|
return qstrlen( hint );
|
|
|
|
// if the letters and numbers are the same, we have an "almost"
|
|
// perfect match.
|
|
QString h( lettersAndNumbers( hint ) );
|
|
QString n( lettersAndNumbers( name ) );
|
|
if ( h == n )
|
|
return qstrlen( hint )-1;
|
|
|
|
if ( h.stripWhiteSpace() == n.stripWhiteSpace() )
|
|
return qstrlen( hint )-2;
|
|
|
|
// could do some more here, but I don't think it's worth it
|
|
|
|
return 0;
|
|
}
|
|
|
|
#endif //TQT_NO_CODECS
|
|
|
|
#endif // USE_QT4
|