|
|
|
/*
|
|
|
|
|
|
|
|
DBSE 3
|
|
|
|
(c) 2000-2003 Andrea Rizzi
|
|
|
|
License: GPLv2
|
|
|
|
|
|
|
|
*/
|
|
|
|
#include <math.h>
|
|
|
|
#include "database.h"
|
|
|
|
|
|
|
|
#include <tqregexp.h>
|
|
|
|
#include <tqdict.h>
|
|
|
|
#include <tdeapplication.h>
|
|
|
|
#include <kdebug.h>
|
|
|
|
#include <tdemessagebox.h>
|
|
|
|
|
|
|
|
#define i18n (const char*)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
DataBase::DataBase(TQString dbpath,TQString dbname, TQString dblang) : Db(0,DB_CXX_NO_EXCEPTIONS)
|
|
|
|
{
|
|
|
|
|
|
|
|
filename=dbpath+"."+dblang+".db";
|
|
|
|
database=dbname;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
int DataBase::open(DBTYPE type,unsigned int flags)
|
|
|
|
{
|
|
|
|
int ret;
|
|
|
|
ret = Db::open(
|
|
|
|
#if (DB_VERSION_MAJOR == 4 && DB_VERSION_MINOR > 0) || (DB_VERSION_MAJOR >= 5)
|
|
|
|
NULL,
|
|
|
|
#endif
|
|
|
|
(const char*)filename.local8Bit(),(const char *)database.local8Bit(),type,flags,0644);
|
|
|
|
mytype=type;
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
unsigned int DataBase::getLast()
|
|
|
|
{
|
|
|
|
if(mytype!=DB_RECNO)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
Dbc *cur;
|
|
|
|
cursor(0,&cur,0);
|
|
|
|
DBItemNum index;
|
|
|
|
DBItemMainKey key;
|
|
|
|
cur->get(&index,&key,DB_LAST);
|
|
|
|
return index.getNum();
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
QueryResult::QueryResult(TQString r)
|
|
|
|
{
|
|
|
|
res=r;
|
|
|
|
}
|
|
|
|
QueryResult::QueryResult(TQString r,TQString o,int s)
|
|
|
|
{
|
|
|
|
res=r;
|
|
|
|
richr=r;
|
|
|
|
orig=o;
|
|
|
|
richo=o;
|
|
|
|
sco=s;
|
|
|
|
}
|
|
|
|
|
|
|
|
QueryResult::QueryResult()
|
|
|
|
{
|
|
|
|
res="";
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
DataBaseInterface::DataBaseInterface(TQString dir, DBSESettings *sets)
|
|
|
|
{
|
|
|
|
|
|
|
|
//FIXME Better db names!!
|
|
|
|
main = openMyDataBase(dir+"/testm","main","it",DB_BTREE);
|
|
|
|
alpha = openMyDataBase(dir+"/testa","alpha","it",DB_BTREE);
|
|
|
|
numindex = openMyDataBase(dir+"/testn","numindex","it",DB_RECNO);
|
|
|
|
wordsindex = openMyDataBase(dir+"/testw","wordsindex","it",DB_BTREE);
|
|
|
|
sentence = openMyDataBase(dir+"/tests","sentence","it",DB_BTREE);
|
|
|
|
corr = openMyDataBase(dir+"/testc","corr","it",DB_BTREE);
|
|
|
|
transword = openMyDataBase(dir+"/testt","transword","it",DB_RECNO);
|
|
|
|
|
|
|
|
// kdDebug(0) << main << endl;
|
|
|
|
// kdDebug(0) << alpha << endl;
|
|
|
|
settings=sets;
|
|
|
|
_stopNow=false;
|
|
|
|
}
|
|
|
|
|
|
|
|
DataBaseInterface::~DataBaseInterface()
|
|
|
|
{
|
|
|
|
|
|
|
|
if(main){
|
|
|
|
main->close(0);
|
|
|
|
delete main;
|
|
|
|
}
|
|
|
|
if(numindex){
|
|
|
|
numindex->close(0);
|
|
|
|
delete numindex;
|
|
|
|
}
|
|
|
|
|
|
|
|
if(alpha){
|
|
|
|
alpha->close(0);
|
|
|
|
delete alpha;
|
|
|
|
}
|
|
|
|
if(wordsindex){
|
|
|
|
wordsindex->close(0);
|
|
|
|
delete wordsindex;
|
|
|
|
}
|
|
|
|
if(sentence){
|
|
|
|
sentence->close(0);
|
|
|
|
delete sentence;
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
DataBase *DataBaseInterface::openMyDataBase(const TQString& prefix,const TQString& name,const TQString& l,DBTYPE tt)
|
|
|
|
{
|
|
|
|
|
|
|
|
DataBase *aDb = new DataBase(prefix,name,l);
|
|
|
|
if(aDb==0){
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
if(aDb->open(tt)!=0)
|
|
|
|
{
|
|
|
|
kdDebug(0) << "Database '"<< name <<"'do not exist, I try to create it.." << endl;
|
|
|
|
//ask only the first time.
|
|
|
|
static bool create=( KMessageBox::questionYesNo(0,"Database do not exist. Do you want to create it now?",
|
|
|
|
i18n("Create Database"), i18n("Create"), i18n("Do Not Create"))==KMessageBox::Yes);
|
|
|
|
if(create)
|
|
|
|
if(aDb->open(tt,DB_CREATE)!=0)
|
|
|
|
{
|
|
|
|
kdDebug(0) << "...cannot create!!"<< endl;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
kdDebug(0) << "...done!" << endl;
|
|
|
|
return aDb;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
return aDb;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* query functions.
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
|
|
DataBaseInterface::MainEntry DataBaseInterface::get(const TQString& query,SearchFilter *filter)
|
|
|
|
{
|
|
|
|
static int counter=1;
|
|
|
|
counter++;
|
|
|
|
DBItemMainKey k(query);
|
|
|
|
DBItemMainData d;
|
|
|
|
//int r=
|
|
|
|
main->get(&k,&d);
|
|
|
|
// kdDebug(0) << "MAINDB->GET returned: " << r << endl;
|
|
|
|
if(counter%5==0) kapp->processEvents(100);
|
|
|
|
// kdDebug(0) << "events processed" << endl;
|
|
|
|
return qMakePair(k,d);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* put functions
|
|
|
|
* *
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
|
|
bool DataBaseInterface::addEntry(TQString original,TQString translated,InputInfo *info)
|
|
|
|
{
|
|
|
|
DBItemMainKey mk(original);
|
|
|
|
DBItemMainData md;
|
|
|
|
TQMap<TQString, int> correlationDiff;
|
|
|
|
bool newentry=false;
|
|
|
|
//try to get
|
|
|
|
kdDebug(0) << "Inserting the pair:" << endl;
|
|
|
|
kdDebug(0) << "ORIGINAL:" << original << endl;
|
|
|
|
kdDebug(0) << "TRANSLATED:" << translated << endl;
|
|
|
|
|
|
|
|
if(main->get(&mk,&md)==DB_NOTFOUND)
|
|
|
|
{
|
|
|
|
kdDebug(0) << "new entry" << endl;
|
|
|
|
newentry=true;
|
|
|
|
//This is a new entry, create index entry
|
|
|
|
DBItemNum *nind;
|
|
|
|
int newid=numindex->getLast()+1;
|
|
|
|
nind=new DBItemNum(newid);
|
|
|
|
numindex->put(nind,&mk);
|
|
|
|
|
|
|
|
delete nind;
|
|
|
|
|
|
|
|
md.clear();
|
|
|
|
md.setIndexnumber(newid);
|
|
|
|
|
|
|
|
|
|
|
|
//Update secondary index alpha
|
|
|
|
DBItemMainKey ka(simple(original));
|
|
|
|
DBItemMultiIndex in;
|
|
|
|
if(alpha->get(&ka,&in)==DB_NOTFOUND) in.clear() ;
|
|
|
|
//alpha->get(&ka,&in);
|
|
|
|
in.addEntry(newid);
|
|
|
|
alpha->put(&ka,&in);
|
|
|
|
kdDebug(0) << "Updating the word index " << endl;
|
|
|
|
//Update words index
|
|
|
|
TQStringList ws=words(original);
|
|
|
|
for(TQStringList::iterator it = ws.begin(); it!=ws.end(); ++it)
|
|
|
|
{
|
|
|
|
DBItemMainKey word(*it);
|
|
|
|
DBItemMultiIndex win;
|
|
|
|
if(wordsindex->get(&word,&win)==DB_NOTFOUND) win.clear();
|
|
|
|
win.addEntry(newid);
|
|
|
|
wordsindex->put(&word,&win);
|
|
|
|
}
|
|
|
|
|
|
|
|
kdDebug(0) << "new entry preparation DONE" << endl;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
|
|
|
|
kdDebug(0) << "It exists!" <<endl;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
//Update sentence index
|
|
|
|
TQStringList so=sentences(original);
|
|
|
|
TQStringList st=sentences(translated);
|
|
|
|
if(so.count()==st.count() && st.count() >1 ) //we already hav a database for single string.
|
|
|
|
{
|
|
|
|
kdDebug(0) << "inside sentence loop" << endl;
|
|
|
|
for(int i=0; i< so.count() ; i++)
|
|
|
|
{
|
|
|
|
DBItemMainKey sk(so[i]);
|
|
|
|
DBItemMainData sd;
|
|
|
|
if(sentence->get(&sk,&sd)==DB_NOTFOUND&&!newentry)
|
|
|
|
kdDebug(0) << "Warning: new sentence for old entry, do we changed sentence definition? " << endl;
|
|
|
|
|
|
|
|
kdDebug(0) << "here alive" << endl;
|
|
|
|
|
|
|
|
// if(clean)
|
|
|
|
sd.removeRef(info->ref());
|
|
|
|
kdDebug(0) << "now alive" << endl;
|
|
|
|
sd.addTranslation(st[i],info->ref());
|
|
|
|
kdDebug(0) << "still alive" << endl;
|
|
|
|
|
|
|
|
sentence->put(&sk,&sd);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
kdDebug(0) << "Fuzzy sentence archive updated" << endl;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
//Add that translation, link to ref for information on that translation
|
|
|
|
|
|
|
|
if(!translated.isEmpty())
|
|
|
|
{
|
|
|
|
//loop on all translations to update correlation
|
|
|
|
TQStringList tmpTranslations=md.getTranslations();
|
|
|
|
for(TQStringList::iterator otIt=tmpTranslations.begin(); otIt!=tmpTranslations.end();++otIt)
|
|
|
|
{
|
|
|
|
TQStringList wt=words(*otIt);
|
|
|
|
for(TQStringList::iterator it = wt.begin(); it!=wt.end(); ++it)
|
|
|
|
{
|
|
|
|
if(correlationDiff.contains(*it))
|
|
|
|
correlationDiff[*it]--;
|
|
|
|
else
|
|
|
|
correlationDiff[*it]=-1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
//clean so that we have only one translation per catalog.
|
|
|
|
md.removeRef(info->ref());
|
|
|
|
md.addTranslation(translated,info->ref());
|
|
|
|
|
|
|
|
tmpTranslations=md.getTranslations();
|
|
|
|
for(TQStringList::iterator otIt=tmpTranslations.begin(); otIt!=tmpTranslations.end();++otIt)
|
|
|
|
{
|
|
|
|
TQStringList wt=words(*otIt);
|
|
|
|
for(TQStringList::iterator it = wt.begin(); it!=wt.end(); ++it)
|
|
|
|
{
|
|
|
|
if(correlationDiff.contains(*it))
|
|
|
|
correlationDiff[*it]++;
|
|
|
|
else
|
|
|
|
correlationDiff[*it]=1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
//FIXME: use the correlationDIff map somehow
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
//finally put!
|
|
|
|
return (main->put(&mk,&md)==0);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
bool DataBaseInterface::removeEntry(TQString original)
|
|
|
|
{
|
|
|
|
DBItemMainKey mk(original);
|
|
|
|
DBItemMainData md;
|
|
|
|
|
|
|
|
//FIXME implement remove
|
|
|
|
//try to get
|
|
|
|
if(main->get(&mk,&md)==DB_NOTFOUND)
|
|
|
|
{
|
|
|
|
/* //This is a new entry, create index entry
|
|
|
|
DBItemNum *nind;
|
|
|
|
int newid=numindex->getLast()+1;
|
|
|
|
nind=new DBItemNum(newid);
|
|
|
|
numindex->put(nind,&mk);
|
|
|
|
|
|
|
|
delete nind;
|
|
|
|
|
|
|
|
md.clear();
|
|
|
|
md.setIndexnumber(newid);
|
|
|
|
|
|
|
|
|
|
|
|
//Update secondary index alpha
|
|
|
|
DBItemMainKey ka(simple(original));
|
|
|
|
DBItemMultiIndex in;
|
|
|
|
if(alpha->get(&ka,&in)==DB_NOTFOUND) in.clear() ;
|
|
|
|
//alpha->get(&ka,&in);
|
|
|
|
in.addEntry(newid);
|
|
|
|
alpha->put(&ka,&in);
|
|
|
|
|
|
|
|
//Update words index
|
|
|
|
TQStringList ws=words(original);
|
|
|
|
for(TQStringList::iterator it = ws.begin(); it!=ws.end(); it++)
|
|
|
|
{
|
|
|
|
DBItemMainKey word(*it);
|
|
|
|
DBItemMultiIndex win;
|
|
|
|
if(wordsindex->get(&word,&win)==DB_NOTFOUND) win.clear();
|
|
|
|
win.addEntry(newid);
|
|
|
|
wordsindex->put(&word,&win);
|
|
|
|
}
|
|
|
|
|
|
|
|
//Update sentence index
|
|
|
|
TQStringList so=sentences(original);
|
|
|
|
TQStringList st=sentences(translated);
|
|
|
|
if(so.count()==st.count() && st.count() >1 ) //we already hav a database for single string.
|
|
|
|
{
|
|
|
|
for(int i=0; i< so.count() ; i++)
|
|
|
|
{
|
|
|
|
DBItemMainKey sk(so[i]);
|
|
|
|
DBItemMainKey sd(st[i]); //should be a list i.e. main data?
|
|
|
|
sentence->put(&sk,&sd);
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
*/
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
TQMap<TQString,float> DataBaseInterface::correlation(TQString word,SearchFilter *filter,bool notify, float minSign)
|
|
|
|
{
|
|
|
|
TQDict<unsigned int> res;
|
|
|
|
// res.setAutoDelete(true);
|
|
|
|
TQMap<TQString, float>final;
|
|
|
|
DBItemMultiIndex::IndexList il;
|
|
|
|
unsigned int tot=0;
|
|
|
|
unsigned int background=0;
|
|
|
|
unsigned int nocck;
|
|
|
|
TQString sword=simple(word);
|
|
|
|
DBItemMainKey *k = new DBItemMainKey(sword);
|
|
|
|
DBItemMultiIndex *d = new DBItemMultiIndex();
|
|
|
|
if(wordsindex->get(k,d)!=DB_NOTFOUND)
|
|
|
|
{
|
|
|
|
|
|
|
|
il=d->getList();
|
|
|
|
kdDebug(0) << il.count()<<endl;
|
|
|
|
tot=0;
|
|
|
|
for(TQValueList<unsigned int>::iterator it=il.begin();it!=il.end();++it)
|
|
|
|
{
|
|
|
|
numindex->get(*it,k);
|
|
|
|
|
|
|
|
|
|
|
|
// TQValueList<QueryResult> trad=exactMatch(k->getString(),filter);
|
|
|
|
|
|
|
|
MainEntry e=get(k->getString(),filter);
|
|
|
|
TQStringList trad=e.second.getTranslations();
|
|
|
|
|
|
|
|
nocck=words(k->getString()).contains(sword);
|
|
|
|
for( TQStringList::iterator it2=trad.begin();it2!=trad.end();++it2)
|
|
|
|
{
|
|
|
|
|
|
|
|
TQStringList w=words(*it2);
|
|
|
|
unsigned int numWords = w.count()*10+1;
|
|
|
|
unsigned int wei=100000/sqrt(numWords); //weight (is the best one?)
|
|
|
|
|
|
|
|
background+=(numWords-nocck)*wei;
|
|
|
|
TQDict<uint> count;
|
|
|
|
//count.setAutoDelete(true);
|
|
|
|
//FIXME:SET AUTODELETE FOR ALL DICTS
|
|
|
|
for(TQStringList::iterator it1=w.begin();it1!=w.end();it1++)
|
|
|
|
{
|
|
|
|
uint *ip;
|
|
|
|
if(!(ip=count[*it1])) count.insert(*it1,new uint(1));
|
|
|
|
else
|
|
|
|
(*ip)++;
|
|
|
|
}
|
|
|
|
|
|
|
|
for(TQStringList::iterator it1=w.begin();it1!=w.end();it1++)
|
|
|
|
{
|
|
|
|
uint *ip;
|
|
|
|
if(*(count[*it1])==nocck) //add only if same number of entry (it cuts articles)
|
|
|
|
if(!(ip=res[*it1])) res.insert(*it1,new uint(wei));
|
|
|
|
else
|
|
|
|
(*ip)+=wei;
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
unsigned int sqrBG=sqrt((1.0*background+1)/10000);
|
|
|
|
|
|
|
|
for(TQDictIterator<uint> it(res) ; it.current(); ++it)
|
|
|
|
{
|
|
|
|
float sign=1.0*(*(it.current()))/(10000.0*sqrBG);
|
|
|
|
if(sign >minSign){
|
|
|
|
final[it.currentKey()]=sign;
|
|
|
|
kdDebug(0) << it.currentKey() <<" Score:" << 1.0*(*(it.current()))/10000 << "/" <<sqrBG << " = " <<sign << endl;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
kdDebug(0) << "final count " <<final.count()<< endl;
|
|
|
|
}
|
|
|
|
|
|
|
|
return final;
|
|
|
|
}
|
|
|
|
|
|
|
|
TQStringList DataBaseInterface::words(TQString s)
|
|
|
|
{
|
|
|
|
TQString str=simple(s);
|
|
|
|
TQStringList list;
|
|
|
|
|
|
|
|
int pos;
|
|
|
|
|
|
|
|
do {
|
|
|
|
pos=str.find(TQRegExp("\\s"));
|
|
|
|
// if(!simple(str.left(pos)).isEmpty())
|
|
|
|
// list.append(simple(str.left(pos)));
|
|
|
|
if(!str.left(pos).isEmpty())
|
|
|
|
list.append(str.left(pos));
|
|
|
|
str=str.remove(0,pos+1);
|
|
|
|
} while(!str.isEmpty() && pos != -1);
|
|
|
|
|
|
|
|
return list;
|
|
|
|
}
|
|
|
|
|
|
|
|
TQString DataBaseInterface::simple(TQString str,bool ck)
|
|
|
|
{
|
|
|
|
TQString res;
|
|
|
|
if(ck)
|
|
|
|
res=str; //case keep
|
|
|
|
else
|
|
|
|
res=str.lower(); //lowercase
|
|
|
|
//FIXME: uncoment the foll. line (check speed)
|
|
|
|
res=res.replace(TQRegExp("(<(.*)>)(.*)(</\\2>)"),"\\3"); //remove enclosing tags
|
|
|
|
|
|
|
|
|
|
|
|
//Try to get rid of regexps.
|
|
|
|
// res=res.replace(TQRegExp("(('|-|_|\\s|[^\\w%])+)")," "); //strip non-word char
|
|
|
|
// res=res.replace(TQRegExp("(('|-|_)+)")," "); //strip non-word char
|
|
|
|
// res=res.replace(TQRegExp("[^\\w\\s%]"),""); //strip non-word char
|
|
|
|
|
|
|
|
TQString r;
|
|
|
|
TQChar c;
|
|
|
|
bool wasSpace=true;
|
|
|
|
uint len=res.length();
|
|
|
|
for(uint i=0; i<len;i++)
|
|
|
|
{
|
|
|
|
c=res[i];
|
|
|
|
if(c.isLetterOrNumber())
|
|
|
|
{
|
|
|
|
r+=c;
|
|
|
|
wasSpace=false;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
if(!wasSpace && c.isSpace())
|
|
|
|
{
|
|
|
|
r+=' ';
|
|
|
|
wasSpace=true;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
if(!wasSpace && (c=='-' || c=='\'' || c=='_'))
|
|
|
|
{
|
|
|
|
r+=' ';
|
|
|
|
wasSpace=true;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
if(c=='%'){
|
|
|
|
r+=c;
|
|
|
|
wasSpace=false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// wasSpace=c.isSpace();
|
|
|
|
}
|
|
|
|
if(r[len-1].isSpace())
|
|
|
|
r.truncate(len-1);
|
|
|
|
res=r;
|
|
|
|
//kdDebug(0) << "Simple: "<<res<< endl;
|
|
|
|
//res=res.simplifyWhiteSpace(); //remove double spaces
|
|
|
|
//res=res.stripWhiteSpace(); //" as " -> "as"
|
|
|
|
|
|
|
|
// kdDebug(0) << res << endl;
|
|
|
|
return res;
|
|
|
|
}
|
|
|
|
|
|
|
|
TQStringList DataBaseInterface::sentences(TQString s)
|
|
|
|
{
|
|
|
|
TQString str=s;
|
|
|
|
TQStringList list;
|
|
|
|
|
|
|
|
// kdDebug(0) << s << endl;
|
|
|
|
|
|
|
|
int pos;
|
|
|
|
|
|
|
|
|
|
|
|
do {
|
|
|
|
TQRegExp re("((\\.|;|\\?|\\!|:)( |$|\\\\n\\n))");
|
|
|
|
pos=re.search(str);
|
|
|
|
if(!str.left(pos).isEmpty())
|
|
|
|
list.append(str.left(pos).stripWhiteSpace());
|
|
|
|
|
|
|
|
kdDebug(0) << str.left(pos) << endl;
|
|
|
|
|
|
|
|
str=str.remove(0,pos+re.cap(1).length());
|
|
|
|
} while(!str.isEmpty() && pos != -1);
|
|
|
|
|
|
|
|
|
|
|
|
return list;
|
|
|
|
}
|
|
|
|
|
|
|
|
TQStringList DataBaseInterface::sentencesSeparator(TQString s)
|
|
|
|
{
|
|
|
|
TQString str=s;
|
|
|
|
TQStringList list;
|
|
|
|
|
|
|
|
// kdDebug(0) << s << endl;
|
|
|
|
|
|
|
|
int pos;
|
|
|
|
|
|
|
|
do {
|
|
|
|
TQRegExp re;
|
|
|
|
re.setPattern("([.:?!;]( |$|\\\\n\\n))");
|
|
|
|
pos = re.search(str);
|
|
|
|
TQString separator=re.cap(1);
|
|
|
|
if(pos!=-1){
|
|
|
|
list.append(separator);
|
|
|
|
}
|
|
|
|
|
|
|
|
str=str.remove(0,pos+1);
|
|
|
|
} while(!str.isEmpty() && pos != -1);
|
|
|
|
|
|
|
|
return list;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool DataBaseInterface::isUpper(TQChar s)
|
|
|
|
{
|
|
|
|
return s==s.upper();
|
|
|
|
}
|
|
|
|
|
|
|
|
bool DataBaseInterface::isLower(TQChar s)
|
|
|
|
{
|
|
|
|
return s==s.lower();
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
TQString DataBaseInterface::format(TQString _s,TQString t)
|
|
|
|
{
|
|
|
|
//FIXME use settings
|
|
|
|
//FIXME use regexp
|
|
|
|
|
|
|
|
TQString s=_s;
|
|
|
|
TQString noTagT=t.replace(TQRegExp("(<(.*)>)(.*)(</\\2>)"),"\\3");
|
|
|
|
TQChar first=noTagT[noTagT.find(TQRegExp("\\w"))];
|
|
|
|
bool firstCapital=isUpper(first);
|
|
|
|
|
|
|
|
/*
|
|
|
|
bool dotsAtEnd=(t.find("...")+3==t.length());
|
|
|
|
bool gtgtAtEnd=(t.find(">>")+2==t.length());
|
|
|
|
bool ltltAtEnd=(t.find("<<")==t.length()-2);
|
|
|
|
|
|
|
|
bool columnAtEnd=(t.find(":")+1==t.length());
|
|
|
|
*/
|
|
|
|
|
|
|
|
bool allupper=(t.upper()==t);
|
|
|
|
|
|
|
|
|
|
|
|
if(firstCapital)
|
|
|
|
s[0]=s[0].upper();
|
|
|
|
else
|
|
|
|
s[0]=s[0].lower();
|
|
|
|
|
|
|
|
//if(dotsAtEnd)
|
|
|
|
// s+="...";
|
|
|
|
|
|
|
|
/*if(gtgtAtEnd)
|
|
|
|
s+=">>";
|
|
|
|
|
|
|
|
if(ltltAtEnd)
|
|
|
|
s+="<<";
|
|
|
|
|
|
|
|
if(columnAtEnd)
|
|
|
|
s+=":";
|
|
|
|
*/
|
|
|
|
|
|
|
|
if(allupper)
|
|
|
|
s=s.upper();
|
|
|
|
|
|
|
|
int pos=t.find(TQRegExp("&"));
|
|
|
|
if(pos>=0) {
|
|
|
|
TQChar accel=t[t.find(TQRegExp("&"))+1];
|
|
|
|
if(accel!='&')
|
|
|
|
{
|
|
|
|
|
|
|
|
pos=s.find(accel,false);
|
|
|
|
if(pos<0)
|
|
|
|
pos=0;
|
|
|
|
s.insert(pos,"&");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
s=formatRegExp(s,t,".*(\\.\\.\\.|:|>>|<<|\\.|\\?)$",
|
|
|
|
"^(.*)$",
|
|
|
|
"\\1@CAP1@");
|
|
|
|
s=formatRegExp(s,t,"(<(.*)>).*(\\.\\.\\.|:|>>|<<|\\.|\\?)*(</\\2>)$",
|
|
|
|
"^(.*)$",
|
|
|
|
"@CAP1@\\1@CAP3@@CAP4@");
|
|
|
|
|
|
|
|
return s;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
TQString DataBaseInterface::formatRegExp(TQString _s, TQString t, TQString tre,TQString stringSearch,TQString stringReplace)
|
|
|
|
{
|
|
|
|
TQString s=_s;
|
|
|
|
TQRegExp templateRegExp(tre);
|
|
|
|
//TQString stringSearch = "(.*)!@CAP1@$"; // use @CAP1.. fot caps in templates
|
|
|
|
//TQString stringReplace = "\\1@CAP1@"; // use \1, \2 for caps in str and @CAP1 fot caps in template
|
|
|
|
|
|
|
|
|
|
|
|
if(templateRegExp.exactMatch(t))
|
|
|
|
{
|
|
|
|
TQStringList caps=templateRegExp.capturedTexts();
|
|
|
|
int i=0;
|
|
|
|
for(TQStringList::iterator capit=caps.begin();capit!=caps.end();++capit)
|
|
|
|
{
|
|
|
|
TQString phRegExp="(?!\\\\)@CAP"+TQString::number(i)+"@";
|
|
|
|
//kdDebug(0) << "phRegExp: " << phRegExp << endl;
|
|
|
|
//kdDebug(0) << "cap[" << i << "]: "<< *capit<< endl;
|
|
|
|
|
|
|
|
stringReplace = stringReplace.replace(TQRegExp(phRegExp),*capit);
|
|
|
|
stringSearch = stringSearch.replace(TQRegExp(phRegExp),*capit);
|
|
|
|
i++;
|
|
|
|
|
|
|
|
}
|
|
|
|
// kdDebug(0) << "stringSearch " << stringSearch << endl;
|
|
|
|
// kdDebug(0) << "stringReplace " << stringReplace << endl;
|
|
|
|
TQRegExp stringSearchRegExp = TQRegExp(stringSearch);
|
|
|
|
// kdDebug(0) << "before: "<<s<<endl;
|
|
|
|
s = s.replace(stringSearchRegExp,stringReplace);
|
|
|
|
// kdDebug(0) << "after: "<<s<<endl;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
return s;
|
|
|
|
}
|
|
|
|
|
|
|
|
DBItemMultiIndex::IndexList DataBaseInterface::getAlpha( const TQString & query )
|
|
|
|
{
|
|
|
|
DBItemMainKey *k = new DBItemMainKey(simple(query));
|
|
|
|
DBItemMultiIndex *d = new DBItemMultiIndex();
|
|
|
|
alpha->get(k,d);
|
|
|
|
|
|
|
|
return d->getList();
|
|
|
|
}
|
|
|
|
|
|
|
|
DataBaseInterface::MainEntry DataBaseInterface::getFromIndex( uint i )
|
|
|
|
{
|
|
|
|
DBItemMainKey k;
|
|
|
|
numindex->get(i,&k);
|
|
|
|
return get(k.getString(),0); //FIXME: this is a BUG right now but the filter should be removed
|
|
|
|
}
|
|
|
|
|
|
|
|
DataBaseInterface::MainEntry DataBaseInterface::getSentence( const TQString & query )
|
|
|
|
{
|
|
|
|
|
|
|
|
static int counter=1;
|
|
|
|
counter++;
|
|
|
|
DBItemMainKey k(query);
|
|
|
|
DBItemMainData d;
|
|
|
|
sentence->get(&k,&d);
|
|
|
|
if(counter%5==0) kapp->processEvents(100);
|
|
|
|
return qMakePair(k,d);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
DBItemMultiIndex::IndexList DataBaseInterface::getWordIndex( const TQString & query )
|
|
|
|
{
|
|
|
|
DBItemMainKey k = DBItemMainKey(query);
|
|
|
|
DBItemMultiIndex d = DBItemMultiIndex();
|
|
|
|
if(wordsindex->get(&k,&d)!=DB_NOTFOUND){
|
|
|
|
return d.getList();
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
TQValueList<unsigned int> tmpList;
|
|
|
|
return tmpList;
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
//#include "database.moc.cpp"
|
|
|
|
|