|
|
|
/* ****************************************************************************
|
|
|
|
This file is part of KBabel
|
|
|
|
|
|
|
|
Copyright (C) 2001 by Matthias Kiefer
|
|
|
|
<matthias.kiefer@gmx.de>
|
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
|
|
it under the terms of the GNU General Public License as published by
|
|
|
|
the Free Software Foundation; either version 2 of the License, or
|
|
|
|
(at your option) any later version.
|
|
|
|
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
GNU General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
|
|
along with this program; if not, write to the Free Software
|
|
|
|
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
|
|
|
|
|
|
|
In addition, as a special exception, the copyright holders give
|
|
|
|
permission to link the code of this program with any edition of
|
|
|
|
the TQt library by Trolltech AS, Norway (or with modified versions
|
|
|
|
of TQt that use the same license as TQt), and distribute linked
|
|
|
|
combinations including the two. You must obey the GNU General
|
|
|
|
Public License in all respects for all of the code used other than
|
|
|
|
TQt. If you modify this file, you may extend this exception to
|
|
|
|
your version of the file, but you are not obligated to do so. If
|
|
|
|
you do not wish to do so, delete this exception statement from
|
|
|
|
your version.
|
|
|
|
|
|
|
|
**************************************************************************** */
|
|
|
|
#include "compendiumdata.h"
|
|
|
|
|
|
|
|
#include <resources.h>
|
|
|
|
#include <catalog.h>
|
|
|
|
#include <tagextractor.h>
|
|
|
|
|
|
|
|
#include <tdeapplication.h>
|
|
|
|
#include <kdebug.h>
|
|
|
|
#include <tdelocale.h>
|
|
|
|
|
|
|
|
using namespace KBabel;
|
|
|
|
|
|
|
|
CompendiumData::CompendiumData(TQObject *parent)
|
|
|
|
: TQObject(parent)
|
|
|
|
, _active(false)
|
|
|
|
, _error(false)
|
|
|
|
, _initialized(false)
|
|
|
|
, _catalog(0)
|
|
|
|
, _exactDict(9887)
|
|
|
|
, _allDict(9887)
|
|
|
|
, _wordDict(9887)
|
|
|
|
, _textonlyDict(9887)
|
|
|
|
{
|
|
|
|
_catalog = new Catalog(this, "CompendiumData::catalog", TQString());
|
|
|
|
_exactDict.setAutoDelete(true);
|
|
|
|
_allDict.setAutoDelete(true);
|
|
|
|
_wordDict.setAutoDelete(true);
|
|
|
|
_textonlyDict.setAutoDelete(true);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
bool CompendiumData::load(KURL url)
|
|
|
|
{
|
|
|
|
if(_active)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
|
|
_error = false;
|
|
|
|
_active = true;
|
|
|
|
|
|
|
|
_exactDict.clear();
|
|
|
|
_allDict.clear();
|
|
|
|
_wordDict.clear();
|
|
|
|
_textonlyDict.clear();
|
|
|
|
|
|
|
|
|
|
|
|
emit progressStarts(i18n("Loading PO compendium"));
|
|
|
|
connect(_catalog, TQ_SIGNAL(signalProgress(int)), this, TQ_SIGNAL(progress(int)));
|
|
|
|
|
|
|
|
ConversionStatus stat=_catalog->openURL(url);
|
|
|
|
|
|
|
|
disconnect(_catalog, TQ_SIGNAL(signalProgress(int))
|
|
|
|
, this, TQ_SIGNAL(progress(int)));
|
|
|
|
|
|
|
|
|
|
|
|
if( stat!= OK && stat != RECOVERED_PARSE_ERROR)
|
|
|
|
{
|
|
|
|
kdDebug(KBABEL_SEARCH) << "error while opening file " << url.prettyURL() << endl;
|
|
|
|
|
|
|
|
_error = true;
|
|
|
|
_errorMsg = i18n("Error while trying to read file for PO Compendium module:\n%1")
|
|
|
|
.arg(url.prettyURL());
|
|
|
|
|
|
|
|
emit progressEnds();
|
|
|
|
|
|
|
|
_active = false;
|
|
|
|
_initialized=true;
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
emit progressStarts(i18n("Building indices"));
|
|
|
|
|
|
|
|
int total = _catalog->numberOfEntries();
|
|
|
|
for(int i=0; i < total; i++)
|
|
|
|
{
|
|
|
|
if( (100*(i+1))%total < 100 )
|
|
|
|
{
|
|
|
|
emit progress((100*(i+1))/total);
|
|
|
|
kapp->processEvents(100);
|
|
|
|
}
|
|
|
|
|
|
|
|
// FIXME: shoudl care about plural forms
|
|
|
|
TQString temp = _catalog->msgid(i,true).first();
|
|
|
|
|
|
|
|
int *index = new int(i);
|
|
|
|
_exactDict.insert(temp,index);
|
|
|
|
|
|
|
|
|
|
|
|
temp = simplify(temp);
|
|
|
|
temp = temp.lower();
|
|
|
|
|
|
|
|
if(!temp.isEmpty() && temp.length() > 1)
|
|
|
|
{
|
|
|
|
// add to allDict
|
|
|
|
TQValueList<int> *indexList=_allDict[temp];
|
|
|
|
|
|
|
|
if(!indexList)
|
|
|
|
{
|
|
|
|
indexList = new TQValueList<int>;
|
|
|
|
_allDict.insert(temp,indexList);
|
|
|
|
}
|
|
|
|
|
|
|
|
indexList->append(i);
|
|
|
|
|
|
|
|
// add to textonlyDict
|
|
|
|
TQString temp1 = temp;
|
|
|
|
temp1.remove( ' ' );
|
|
|
|
|
|
|
|
indexList=_textonlyDict[temp1];
|
|
|
|
|
|
|
|
if(!indexList)
|
|
|
|
{
|
|
|
|
indexList = new TQValueList<int>;
|
|
|
|
_textonlyDict.insert(temp1,indexList);
|
|
|
|
kdDebug() << "Adding " << temp1 << endl;
|
|
|
|
}
|
|
|
|
|
|
|
|
indexList->append(i);
|
|
|
|
|
|
|
|
// add to wordDict
|
|
|
|
TQStringList wList = wordList(temp);
|
|
|
|
for ( TQStringList::Iterator it = wList.begin()
|
|
|
|
; it != wList.end(); ++it )
|
|
|
|
{
|
|
|
|
if( (*it).length() > 1)
|
|
|
|
{
|
|
|
|
indexList=_wordDict[*it];
|
|
|
|
|
|
|
|
if(!indexList)
|
|
|
|
{
|
|
|
|
indexList = new TQValueList<int>;
|
|
|
|
_wordDict.insert(*it,indexList);
|
|
|
|
}
|
|
|
|
|
|
|
|
indexList->append(i);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// remove words, that are too frequent
|
|
|
|
uint max=_allDict.count()/10;
|
|
|
|
TQDictIterator< TQValueList<int> > it(_wordDict);
|
|
|
|
while ( it.current() )
|
|
|
|
{
|
|
|
|
if(it.current()->count() > max)
|
|
|
|
{
|
|
|
|
_wordDict.remove(it.currentKey());
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
++it;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
_initialized=true;
|
|
|
|
|
|
|
|
emit progressEnds();
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
_active = false;
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
const int* CompendiumData::exactDict(const TQString text) const
|
|
|
|
{
|
|
|
|
return _exactDict[text];
|
|
|
|
}
|
|
|
|
|
|
|
|
const TQValueList<int>* CompendiumData::allDict(const TQString text) const
|
|
|
|
{
|
|
|
|
return _allDict[text];
|
|
|
|
}
|
|
|
|
|
|
|
|
const TQValueList<int>* CompendiumData::wordDict(const TQString text) const
|
|
|
|
{
|
|
|
|
return _wordDict[text];
|
|
|
|
}
|
|
|
|
|
|
|
|
const TQValueList<int>* CompendiumData::textonlyDict(const TQString text) const
|
|
|
|
{
|
|
|
|
return _textonlyDict[text];
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void CompendiumData::registerObject(TQObject *obj)
|
|
|
|
{
|
|
|
|
if(!_registered.containsRef(obj))
|
|
|
|
_registered.append(obj);
|
|
|
|
}
|
|
|
|
|
|
|
|
bool CompendiumData::unregisterObject(TQObject *obj)
|
|
|
|
{
|
|
|
|
_registered.removeRef(obj);
|
|
|
|
|
|
|
|
return _registered.count()==0;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool CompendiumData::hasObjects() const
|
|
|
|
{
|
|
|
|
return _registered.count()==0;
|
|
|
|
}
|
|
|
|
|
|
|
|
TQString CompendiumData::simplify(const TQString string)
|
|
|
|
{
|
|
|
|
TQString result;
|
|
|
|
|
|
|
|
TagExtractor te;
|
|
|
|
te.setString(string);
|
|
|
|
result=te.plainString();
|
|
|
|
|
|
|
|
result=result.simplifyWhiteSpace();
|
|
|
|
result=result.stripWhiteSpace();
|
|
|
|
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
TQStringList CompendiumData::wordList(const TQString string)
|
|
|
|
{
|
|
|
|
TQString result=CompendiumData::simplify(string);
|
|
|
|
|
|
|
|
return TQStringList::split(' ',result);
|
|
|
|
}
|
|
|
|
|
|
|
|
#include "compendiumdata.moc"
|