/*************************************************************************** begin : Sun Feb 29 2004 copyright : (C) 2004 by Jeroen Wijnhout email : Jeroen.Wijnhout@kdemail.net ***************************************************************************/ /*************************************************************************** * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * ***************************************************************************/ #include "convert.h" #include #include #include #include #include #include #include "kiledebug.h" #include TQMap ConvertMap::g_maps; bool ConvertMap::create(const TQString & encoding) { KILE_DEBUG() << "\tlooking for map for " << encoding << endl; ConvertMap * map = g_maps[encoding]; if ( map == 0 ) { KILE_DEBUG() << "\tcreating a map for " << encoding << endl; map = new ConvertMap(encoding); // FIXME This will never be deleted if load() succeeds... if ( map->load() ) g_maps[encoding] = map; else { delete map; map = 0L; } map = g_maps[encoding]; } return ( map != 0L ); } TQString ConvertMap::encodingNameFor(const TQString & name) { TQString std; for ( uint i = 0; i < name.length(); ++i ) if ( !name[i].isSpace() ) std += name[i]; std = std.lower(); if ( std.startsWith("iso8859-") ) return "latin" + std.right(1); if ( std.startsWith("cp") ) return "cp" + std.right(4); return name; } TQString ConvertMap::isoNameFor(const TQString & name) { TQString std; for ( uint i = 0; i < name.length(); ++i ) if ( !name[i].isSpace() ) std += name[i]; std = std.lower(); if ( std.startsWith("latin") ) return "ISO 8859-" + std.right(1); if ( std.startsWith("cp" ) ) return "cp " + std.right(4); return name; } ConvertMap::ConvertMap(const TQString & enc ) { m_aliases.append(encodingNameFor(enc)); m_aliases.append(isoNameFor(enc)); } void ConvertMap::addPair(TQChar c, const TQString & enc) { m_toASCII[c] = commandIsTerminated(enc) ? enc : enc + "{}" ; m_toEncoding[enc] = c; } bool ConvertMap::commandIsTerminated(const TQString & command) { static TQRegExp reCommandSequences("\\\\([a-zA-Z]+|\\\"|\\')$"); return (reCommandSequences.search(command) == -1); } bool ConvertMap::load() { static TQRegExp reMap("^(.*):(.*)"); //makeMap(encoding()); //if map already exists, replace it TQFile qf(TDEGlobal::dirs()->findResource("appdata","encodings/" + encoding() + ".enc")); if ( qf.open(IO_ReadOnly) ) { TQTextStream stream( &qf ); TQTextCodec *codec = TQTextCodec::codecForName(isoName().ascii()); if ( codec ) stream.setCodec(codec); while ( !stream.atEnd() ) { //parse the line if ( stream.readLine().find(reMap) != -1) addPair(reMap.cap(1)[0], reMap.cap(2)); } qf.close(); return true; } return false; } //BEGIN ConvertIO classes ConvertIO::ConvertIO(Kate::Document *doc) : m_doc(doc), m_text(TQString()), m_line(TQString()), m_nLine(0) { } TQString & ConvertIO::currentLine() { return m_line; } void ConvertIO::nextLine() { m_line = m_doc->textLine(m_nLine++); } void ConvertIO::writeText() { m_doc->setText(m_text); } uint ConvertIO::current() { return m_nLine; } bool ConvertIO::done() { return current() == m_doc->numLines(); } ConvertIOFile::ConvertIOFile(Kate::Document *doc, const KURL & url) : ConvertIO(doc), m_url(url) { } void ConvertIOFile::writeText() { TQFile qf(m_url.path()); if ( qf.open(IO_WriteOnly) ) { //read the file TQTextStream stream( &qf ); stream << m_text; qf.close(); } else kdWarning() << "Could not open " << m_url.path() << endl; } ConvertBase::ConvertBase(const TQString & encoding, ConvertIO * io) : m_io(io), m_encoding(encoding), m_map(0L) { } //END ConvertIO classes //BEGIN ConvertBase TQString ConvertBase::mapNext(uint &i) { return (TQString)m_io->currentLine()[i++]; } bool ConvertBase::convert() { if ( ! setMap() ) return false; m_io->text() = TQString(); do { m_io->nextLine(); uint i = 0; while ( i < m_io->currentLine().length() ) { m_io->text() += mapNext(i); } if ( ! m_io->done() ) m_io->text() += '\n'; } while ( ! m_io->done() ); m_io->writeText(); return true; } bool ConvertBase::setMap() { //create map (or use existing) if (ConvertMap::create(m_encoding)) m_map = ConvertMap::mapFor(m_encoding); else m_map = 0L; return ( m_map != 0L ); } //END ConvertBase //BEGIN ConvertEncToASCII TQString ConvertEncToASCII::mapNext(uint &i) { return m_map->canDecode(m_io->currentLine()[i]) ? m_map->toASCII(m_io->currentLine()[i++]) : (TQString)m_io->currentLine()[i++]; } //END ConvertEncToASCII //BEGIN ConvertASCIIToEnc //i is the position of the '\' TQString ConvertASCIIToEnc::nextSequence(uint &i) { //get first two characters TQString seq = (TQString)m_io->currentLine()[i++]; if ( m_io->currentLine()[i].isLetter() ) { while ( m_io->currentLine()[i].isLetter() ) seq += (TQString)m_io->currentLine()[i++]; } else return seq + (TQString)m_io->currentLine()[i++]; return seq; } bool ConvertASCIIToEnc::isModifier(const TQString & seq) { static TQRegExp reModifier("\\\\([cHkruv]|\"|\'|\\^|`|~|=|\\.)"); return reModifier.exactMatch(seq); } TQString ConvertASCIIToEnc::getSequence(uint &i) { TQString seq = nextSequence(i); static TQRegExp reBraces("\\{([a-zA-Z]?)\\}"); if ( isModifier(seq) ) { KILE_DEBUG() << "\tisModifier true : " << seq << endl; if ( seq[seq.length() - 1].isLetter() ) seq += ' '; while ( m_io->currentLine()[i].isSpace() ) i++; if ( m_io->currentLine().mid(i,2) == "{}" ) i = i + 2; if ( m_io->currentLine()[i] == '\\' ) seq += nextSequence(i); else { if ( reBraces.exactMatch(m_io->currentLine().mid(i,3)) ) { KILE_DEBUG() << "\tbraces detected" << endl; i = i + 3; seq += reBraces.cap(1); } else { TQChar nextChar = m_io->currentLine()[i++]; if ( !nextChar.isSpace() ) seq += (TQString)nextChar; } } } else if ( m_map->canEncode(seq) ) { if ( m_io->currentLine().mid(i,2) == "{}" ) i = i + 2; else if ( m_io->currentLine()[i].isSpace() ) ++i; } return seq; } TQString ConvertASCIIToEnc::mapNext(uint &i) { if ( m_io->currentLine()[i] == '\\' ) { TQString seq = getSequence(i); KILE_DEBUG() << "'\tsequence: " << seq << endl; if ( m_map->canEncode(seq) ) { KILE_DEBUG() << "\tcan encode this" << endl; //if ( m_io->currentLine().mid(i, 2) == "{}" ) i = i + 2; return m_map->toEncoding(seq); } else return seq; } return ConvertBase::mapNext(i); } //END ConvertASCIIToEnc