You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
327 lines
7.0 KiB
327 lines
7.0 KiB
/***************************************************************************
|
|
begin : Sun Feb 29 2004
|
|
copyright : (C) 2004 by Jeroen Wijnhout
|
|
email : Jeroen.Wijnhout@kdemail.net
|
|
***************************************************************************/
|
|
|
|
/***************************************************************************
|
|
* *
|
|
* This program is free software; you can redistribute it and/or modify *
|
|
* it under the terms of the GNU General Public License as published by *
|
|
* the Free Software Foundation; either version 2 of the License, or *
|
|
* (at your option) any later version. *
|
|
* *
|
|
***************************************************************************/
|
|
|
|
#include "convert.h"
|
|
|
|
#include <tqregexp.h>
|
|
#include <tqtextcodec.h>
|
|
#include <tqfile.h>
|
|
|
|
#include <tdemessagebox.h>
|
|
#include <tdeglobal.h>
|
|
#include <kstandarddirs.h>
|
|
#include "kiledebug.h"
|
|
#include <kate/document.h>
|
|
|
|
TQMap<TQString, ConvertMap*> ConvertMap::g_maps;
|
|
|
|
bool ConvertMap::create(const TQString & encoding)
|
|
{
|
|
KILE_DEBUG() << "\tlooking for map for " << encoding << endl;
|
|
ConvertMap * map = g_maps[encoding];
|
|
|
|
if ( map == 0 )
|
|
{
|
|
KILE_DEBUG() << "\tcreating a map for " << encoding << endl;
|
|
map = new ConvertMap(encoding); // FIXME This will never be deleted if load() succeeds...
|
|
if ( map->load() )
|
|
g_maps[encoding] = map;
|
|
else {
|
|
delete map;
|
|
map = 0L;
|
|
}
|
|
|
|
map = g_maps[encoding];
|
|
}
|
|
|
|
return ( map != 0L );
|
|
}
|
|
|
|
TQString ConvertMap::encodingNameFor(const TQString & name)
|
|
{
|
|
TQString std;
|
|
for ( uint i = 0; i < name.length(); ++i )
|
|
if ( !name[i].isSpace() )
|
|
std += name[i];
|
|
|
|
std = std.lower();
|
|
|
|
if ( std.startsWith("iso8859-") )
|
|
return "latin" + std.right(1);
|
|
|
|
if ( std.startsWith("cp") )
|
|
return "cp" + std.right(4);
|
|
|
|
return name;
|
|
}
|
|
|
|
TQString ConvertMap::isoNameFor(const TQString & name)
|
|
{
|
|
TQString std;
|
|
for ( uint i = 0; i < name.length(); ++i )
|
|
if ( !name[i].isSpace() )
|
|
std += name[i];
|
|
|
|
std = std.lower();
|
|
|
|
if ( std.startsWith("latin") )
|
|
return "ISO 8859-" + std.right(1);
|
|
|
|
if ( std.startsWith("cp" ) )
|
|
return "cp " + std.right(4);
|
|
|
|
return name;
|
|
}
|
|
|
|
ConvertMap::ConvertMap(const TQString & enc )
|
|
{
|
|
m_aliases.append(encodingNameFor(enc));
|
|
m_aliases.append(isoNameFor(enc));
|
|
}
|
|
|
|
void ConvertMap::addPair(TQChar c, const TQString & enc)
|
|
{
|
|
m_toASCII[c] = commandIsTerminated(enc) ? enc : enc + "{}" ;
|
|
m_toEncoding[enc] = c;
|
|
}
|
|
|
|
bool ConvertMap::commandIsTerminated(const TQString & command)
|
|
{
|
|
static TQRegExp reCommandSequences("\\\\([a-zA-Z]+|\\\"|\\')$");
|
|
|
|
return (reCommandSequences.search(command) == -1);
|
|
}
|
|
|
|
bool ConvertMap::load()
|
|
{
|
|
static TQRegExp reMap("^(.*):(.*)");
|
|
|
|
//makeMap(encoding());
|
|
|
|
//if map already exists, replace it
|
|
TQFile qf(TDEGlobal::dirs()->findResource("appdata","encodings/" + encoding() + ".enc"));
|
|
|
|
if ( qf.open(IO_ReadOnly) )
|
|
{
|
|
TQTextStream stream( &qf );
|
|
TQTextCodec *codec = TQTextCodec::codecForName(isoName().ascii());
|
|
if ( codec ) stream.setCodec(codec);
|
|
|
|
while ( !stream.atEnd() )
|
|
{
|
|
//parse the line
|
|
if ( stream.readLine().find(reMap) != -1)
|
|
addPair(reMap.cap(1)[0], reMap.cap(2));
|
|
}
|
|
qf.close();
|
|
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
//BEGIN ConvertIO classes
|
|
ConvertIO::ConvertIO(Kate::Document *doc) :
|
|
m_doc(doc),
|
|
m_text(TQString()),
|
|
m_line(TQString()),
|
|
m_nLine(0)
|
|
{
|
|
}
|
|
|
|
TQString & ConvertIO::currentLine()
|
|
{
|
|
return m_line;
|
|
}
|
|
|
|
void ConvertIO::nextLine()
|
|
{
|
|
m_line = m_doc->textLine(m_nLine++);
|
|
}
|
|
|
|
void ConvertIO::writeText()
|
|
{
|
|
m_doc->setText(m_text);
|
|
}
|
|
|
|
uint ConvertIO::current()
|
|
{
|
|
return m_nLine;
|
|
}
|
|
|
|
bool ConvertIO::done()
|
|
{
|
|
return current() == m_doc->numLines();
|
|
}
|
|
|
|
ConvertIOFile::ConvertIOFile(Kate::Document *doc, const KURL & url) : ConvertIO(doc), m_url(url)
|
|
{
|
|
}
|
|
|
|
void ConvertIOFile::writeText()
|
|
{
|
|
TQFile qf(m_url.path());
|
|
if ( qf.open(IO_WriteOnly) )
|
|
{
|
|
//read the file
|
|
TQTextStream stream( &qf );
|
|
stream << m_text;
|
|
qf.close();
|
|
}
|
|
else
|
|
kdWarning() << "Could not open " << m_url.path() << endl;
|
|
}
|
|
|
|
ConvertBase::ConvertBase(const TQString & encoding, ConvertIO * io) :
|
|
m_io(io),
|
|
m_encoding(encoding),
|
|
m_map(0L)
|
|
{
|
|
}
|
|
|
|
//END ConvertIO classes
|
|
|
|
//BEGIN ConvertBase
|
|
TQString ConvertBase::mapNext(uint &i)
|
|
{
|
|
return (TQString)m_io->currentLine()[i++];
|
|
}
|
|
|
|
bool ConvertBase::convert()
|
|
{
|
|
if ( ! setMap() ) return false;
|
|
|
|
m_io->text() = TQString();
|
|
do
|
|
{
|
|
m_io->nextLine();
|
|
uint i = 0;
|
|
while ( i < m_io->currentLine().length() )
|
|
{
|
|
m_io->text() += mapNext(i);
|
|
}
|
|
if ( ! m_io->done() ) m_io->text() += '\n';
|
|
}
|
|
while ( ! m_io->done() );
|
|
|
|
m_io->writeText();
|
|
return true;
|
|
}
|
|
|
|
bool ConvertBase::setMap()
|
|
{
|
|
//create map (or use existing)
|
|
if (ConvertMap::create(m_encoding))
|
|
m_map = ConvertMap::mapFor(m_encoding);
|
|
else
|
|
m_map = 0L;
|
|
|
|
return ( m_map != 0L );
|
|
}
|
|
//END ConvertBase
|
|
|
|
//BEGIN ConvertEncToASCII
|
|
TQString ConvertEncToASCII::mapNext(uint &i)
|
|
{
|
|
return m_map->canDecode(m_io->currentLine()[i]) ? m_map->toASCII(m_io->currentLine()[i++]) : (TQString)m_io->currentLine()[i++];
|
|
}
|
|
//END ConvertEncToASCII
|
|
|
|
//BEGIN ConvertASCIIToEnc
|
|
|
|
//i is the position of the '\'
|
|
TQString ConvertASCIIToEnc::nextSequence(uint &i)
|
|
{
|
|
//get first two characters
|
|
TQString seq = (TQString)m_io->currentLine()[i++];
|
|
|
|
if ( m_io->currentLine()[i].isLetter() )
|
|
{
|
|
while ( m_io->currentLine()[i].isLetter() )
|
|
seq += (TQString)m_io->currentLine()[i++];
|
|
}
|
|
else
|
|
return seq + (TQString)m_io->currentLine()[i++];
|
|
|
|
return seq;
|
|
}
|
|
|
|
bool ConvertASCIIToEnc::isModifier(const TQString & seq)
|
|
{
|
|
static TQRegExp reModifier("\\\\([cHkruv]|\"|\'|\\^|`|~|=|\\.)");
|
|
return reModifier.exactMatch(seq);
|
|
}
|
|
|
|
TQString ConvertASCIIToEnc::getSequence(uint &i)
|
|
{
|
|
TQString seq = nextSequence(i);
|
|
static TQRegExp reBraces("\\{([a-zA-Z]?)\\}");
|
|
|
|
if ( isModifier(seq) )
|
|
{
|
|
KILE_DEBUG() << "\tisModifier true : " << seq << endl;
|
|
if ( seq[seq.length() - 1].isLetter() ) seq += ' ';
|
|
|
|
while ( m_io->currentLine()[i].isSpace() ) i++;
|
|
|
|
if ( m_io->currentLine().mid(i,2) == "{}" ) i = i + 2;
|
|
|
|
if ( m_io->currentLine()[i] == '\\' )
|
|
seq += nextSequence(i);
|
|
else
|
|
{
|
|
if ( reBraces.exactMatch(m_io->currentLine().mid(i,3)) )
|
|
{
|
|
KILE_DEBUG() << "\tbraces detected" << endl;
|
|
i = i + 3;
|
|
seq += reBraces.cap(1);
|
|
}
|
|
else
|
|
{
|
|
TQChar nextChar = m_io->currentLine()[i++];
|
|
if ( !nextChar.isSpace() ) seq += (TQString)nextChar;
|
|
}
|
|
}
|
|
}
|
|
else if ( m_map->canEncode(seq) )
|
|
{
|
|
if ( m_io->currentLine().mid(i,2) == "{}" ) i = i + 2;
|
|
else if ( m_io->currentLine()[i].isSpace() ) ++i;
|
|
}
|
|
|
|
return seq;
|
|
}
|
|
|
|
TQString ConvertASCIIToEnc::mapNext(uint &i)
|
|
{
|
|
if ( m_io->currentLine()[i] == '\\' )
|
|
{
|
|
TQString seq = getSequence(i);
|
|
KILE_DEBUG() << "'\tsequence: " << seq << endl;
|
|
if ( m_map->canEncode(seq) )
|
|
{
|
|
KILE_DEBUG() << "\tcan encode this" << endl;
|
|
//if ( m_io->currentLine().mid(i, 2) == "{}" ) i = i + 2;
|
|
return m_map->toEncoding(seq);
|
|
}
|
|
else
|
|
return seq;
|
|
}
|
|
|
|
return ConvertBase::mapNext(i);
|
|
}
|
|
//END ConvertASCIIToEnc
|