You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
204 lines
4.1 KiB
204 lines
4.1 KiB
//
|
|
// C++ Implementation: chunk
|
|
//
|
|
// Description:
|
|
//
|
|
//
|
|
// Author: Andrea Rizzi <rizzi@kde.org>, (C) 2003
|
|
//
|
|
// Copyright: See COPYING file that comes with this distribution
|
|
//
|
|
//
|
|
#include "chunk.h"
|
|
#include "algorithms.h"
|
|
#include <kdebug.h>
|
|
|
|
|
|
|
|
AbstractChunk::~AbstractChunk( )
|
|
{
|
|
}
|
|
|
|
WordChunk::WordChunk( DataBaseInterface * di, TQString _word ) : AbstractChunk(di)
|
|
{
|
|
word=_word;
|
|
}
|
|
|
|
TQValueList<QueryResult> WordChunk::translations( )
|
|
{
|
|
DataBaseInterface::ResultList r;
|
|
SingleWordSearchAlgorithm sa(di,di->getSettings());
|
|
r=sa.exec(word);
|
|
return r;
|
|
}
|
|
|
|
//TQValueList<QueryResult> WordChunk::translationsFromReference( uint reference )
|
|
//{
|
|
//}
|
|
|
|
TQValueList< uint > WordChunk::locationReferences( )
|
|
{
|
|
TQValueList<uint> res=di->getWordIndex(word);
|
|
kdDebug(0) << "Number of locations " << res.count() <<endl ;
|
|
return res;
|
|
/* DBItemMainKey *k = new DBItemMainKey(word);
|
|
DBItemMultiIndex *d = new DBItemMultiIndex();
|
|
if(wordsindex->get(k,d)!=DB_NOTFOUND)
|
|
return d->getList();
|
|
else
|
|
return TQValueList<uint> tmpList;
|
|
*/
|
|
}
|
|
|
|
void WordChunk::setLocationReferences( TQValueList< uint > )
|
|
{
|
|
}
|
|
|
|
SentenceChunk::SentenceChunk( DataBaseInterface * di, TQString _sentence ): AbstractChunk(di)
|
|
{
|
|
sentence=_sentence;
|
|
}
|
|
|
|
TQValueList<QueryResult> SentenceChunk::translations( )
|
|
{
|
|
GenericSearchAlgorithm g(di,di->getSettings());
|
|
|
|
ExactSearchAlgorithm e(di,di->getSettings());
|
|
AlphaSearchAlgorithm a(di,di->getSettings());
|
|
SentenceArchiveSearchAlgorithm s(di,di->getSettings());
|
|
|
|
g.addAlgorithm(&e);
|
|
g.addAlgorithm(&a);
|
|
g.addAlgorithm(&s);
|
|
|
|
return g.exec(sentence);
|
|
|
|
}
|
|
|
|
//TQValueList<QueryResult> SentenceChunk::translationsFromReference( uint reference )
|
|
//{
|
|
//
|
|
//}
|
|
|
|
TQValueList< uint > SentenceChunk::locationReferences( )
|
|
{
|
|
}
|
|
|
|
void SentenceChunk::setLocationReferences( TQValueList< uint > )
|
|
{
|
|
}
|
|
|
|
TQPtrList< AbstractChunk> WordChunkFactory::chunks()
|
|
{
|
|
TQString str=di->simple(string);
|
|
TQPtrList<AbstractChunk> list;
|
|
if(str.isEmpty()) return list;
|
|
_separators.clear();
|
|
kdDebug(0) << "Word chunks of:" <<str << endl;
|
|
int pos;
|
|
TQString sep;
|
|
TQRegExp r("(\\s)");
|
|
do {
|
|
pos=r.search(str);
|
|
|
|
sep=r.cap(1);
|
|
if(!str.left(pos).isEmpty()){
|
|
//WordChunk *c=new WordChunk(di,di->simple(str.left(pos)))
|
|
list.append(new WordChunk(di,str.left(pos)));
|
|
_separators.append(sep);
|
|
}
|
|
else
|
|
{
|
|
uint current=_separators.count()-1;
|
|
_separators[current]=_separators[current]+sep;
|
|
}
|
|
str=str.remove(0,pos+1);
|
|
} while(!str.isEmpty() && pos != -1);
|
|
|
|
return list;
|
|
}
|
|
|
|
|
|
|
|
TQPtrList<AbstractChunk> SentenceChunkFactory::chunks()
|
|
{
|
|
TQString str=string;
|
|
TQPtrList<AbstractChunk> list;
|
|
if(str.isEmpty()) return list;
|
|
|
|
// kdDebug(0) << s << endl;
|
|
|
|
int pos;
|
|
|
|
|
|
do {
|
|
TQRegExp re("((\\.|;|\\?|\\!|:)( |$|\\\\n\\n))");
|
|
pos=re.search(str);
|
|
TQString sep=re.cap(1);
|
|
|
|
if(!str.left(pos).isEmpty())
|
|
{
|
|
list.append(new SentenceChunk(di,str.left(pos).stripWhiteSpace()));
|
|
_separators.append(sep);
|
|
}
|
|
else
|
|
{
|
|
uint current=_separators.count()-1;
|
|
_separators[current]=_separators[current]+sep;
|
|
}
|
|
|
|
str=str.remove(0,pos+re.cap(1).length());
|
|
} while(!str.isEmpty() && pos != -1);
|
|
|
|
|
|
return list;
|
|
|
|
}
|
|
TQPtrList< AbstractChunk > CaseBasedWordChunkFactory::chunks( )
|
|
{
|
|
TQString str=string;
|
|
TQPtrList<AbstractChunk> list;
|
|
if(str.isEmpty()) return list;
|
|
uint slen=str.length();
|
|
kdDebug(0) << "CaseWordChunk string:" << str << endl;
|
|
TQString tmpWord;
|
|
bool upcase;
|
|
for(uint i=0;i<=slen;i++)
|
|
{
|
|
bool tmpCase=(str[i]==str[i].upper());
|
|
if(upcase!=tmpCase)
|
|
{
|
|
if(!tmpWord.isEmpty() && !tmpWord.isNull()){
|
|
list.append(new WordChunk(di,tmpWord));
|
|
_separators.append("");
|
|
}
|
|
kdDebug(0) << "CaseWordChunk:" << tmpWord << endl;
|
|
tmpWord="";
|
|
|
|
}
|
|
tmpWord+=str[i];
|
|
upcase=tmpCase;
|
|
}
|
|
|
|
return list;
|
|
}
|
|
|
|
WordChunkFactory::WordChunkFactory( DataBaseInterface * _di ) : AbstractChunkFactory(_di)
|
|
{
|
|
}
|
|
|
|
SentenceChunkFactory::SentenceChunkFactory( DataBaseInterface * _di ): AbstractChunkFactory(_di)
|
|
{
|
|
}
|
|
|
|
CaseBasedWordChunkFactory::CaseBasedWordChunkFactory( DataBaseInterface * _di ): AbstractChunkFactory(_di)
|
|
{
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|