You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
488 lines
15 KiB
488 lines
15 KiB
#include <tqregexp.h>
|
|
#include <tqdir.h>
|
|
#include <assert.h>
|
|
|
|
#include <kapplication.h>
|
|
#include <kdebug.h>
|
|
#include <kstandarddirs.h>
|
|
#include <kprocess.h>
|
|
#include <klocale.h>
|
|
#include <kconfig.h>
|
|
|
|
|
|
#include "progressdialog.h"
|
|
#include "htmlsearch.moc"
|
|
|
|
|
|
HTMLSearch::HTMLSearch()
|
|
: TQObject(), _proc(0)
|
|
{
|
|
}
|
|
|
|
|
|
TQString HTMLSearch::dataPath(const TQString& _lang)
|
|
{
|
|
return kapp->dirs()->saveLocation("data", TQString("khelpcenter/%1").arg(_lang));
|
|
}
|
|
|
|
|
|
void HTMLSearch::scanDir(const TQString& dir)
|
|
{
|
|
assert( dir.at( dir.length() - 1 ) == '/' );
|
|
|
|
TQStringList::ConstIterator it;
|
|
|
|
if ( KStandardDirs::exists( dir + "index.docbook" ) ) {
|
|
_files.append(dir + "index.docbook");
|
|
progress->setFilesScanned(++_filesScanned);
|
|
} else {
|
|
TQDir d(dir, "*.html", TQDir::Name|TQDir::IgnoreCase, TQDir::Files | TQDir::Readable);
|
|
TQStringList const &list = d.entryList();
|
|
TQString adir = d.canonicalPath () + "/";
|
|
TQString file;
|
|
for (it=list.begin(); it != list.end(); ++it)
|
|
{
|
|
file = adir + *it;
|
|
if ( !_files.contains( file ) ) {
|
|
_files.append(file);
|
|
progress->setFilesScanned(++_filesScanned);
|
|
}
|
|
}
|
|
}
|
|
|
|
TQDir d2(dir, TQString::null, TQDir::Name|TQDir::IgnoreCase, TQDir::Dirs);
|
|
TQStringList const &dlist = d2.entryList();
|
|
for (it=dlist.begin(); it != dlist.end(); ++it)
|
|
if (*it != "." && *it != "..")
|
|
{
|
|
scanDir(dir + *it + "/");
|
|
kapp->processEvents();
|
|
}
|
|
}
|
|
|
|
|
|
bool HTMLSearch::saveFilesList(const TQString& _lang)
|
|
{
|
|
TQStringList dirs;
|
|
|
|
// throw away old files list
|
|
_files.clear();
|
|
|
|
// open config file
|
|
TDEConfig *config = new TDEConfig("khelpcenterrc");
|
|
config->setGroup("Scope");
|
|
|
|
// add KDE help dirs
|
|
if (config->readBoolEntry("KDE", true))
|
|
dirs = kapp->dirs()->findDirs("html", _lang + "/");
|
|
kdDebug() << "got " << dirs.count() << " dirs\n";
|
|
|
|
// TODO: Man and Info!!
|
|
|
|
// add local urls
|
|
TQStringList add = config->readListEntry("Paths");
|
|
TQStringList::Iterator it;
|
|
for (it = add.begin(); it != add.end(); ++it) {
|
|
if ( ( *it ).at( ( *it ).length() - 1 ) != '/' )
|
|
( *it ) += '/';
|
|
dirs.append(*it);
|
|
}
|
|
|
|
_filesScanned = 0;
|
|
|
|
for (it = dirs.begin(); it != dirs.end(); ++it)
|
|
scanDir(*it);
|
|
|
|
delete config;
|
|
|
|
return true;
|
|
}
|
|
|
|
|
|
bool HTMLSearch::createConfig(const TQString& _lang)
|
|
{
|
|
TQString fname = dataPath(_lang) + "/htdig.conf";
|
|
|
|
// locate the common dir
|
|
TQString wrapper = locate("data", TQString("khelpcenter/%1/wrapper.html").arg(_lang));
|
|
if (wrapper.isEmpty())
|
|
wrapper = locate("data", TQString("khelpcenter/en/wrapper.html"));
|
|
if (wrapper.isEmpty())
|
|
return false;
|
|
wrapper = wrapper.left(wrapper.length() - 12);
|
|
|
|
// locate the image dir
|
|
TQString images = locate("data", "khelpcenter/pics/star.png");
|
|
if (images.isEmpty())
|
|
return false;
|
|
images = images.left(images.length() - 8);
|
|
|
|
// This is an example replacement for the default bad_words file
|
|
// distributed with ht://Dig. It was compiled by Marjolein Katsma
|
|
// <HSH@taxon.demon.nl>.
|
|
TQString bad_words = i18n( "List of words to exclude from index",
|
|
"above:about:according:across:actually:\n"
|
|
"adj:after:afterwards:again:against:all:\n"
|
|
"almost:alone:along:already:also:although:\n"
|
|
"always:among:amongst:and:another:any:\n"
|
|
"anyhow:anyone:anything:anywhere:are:aren:\n"
|
|
"arent:around:became:because:become:\n"
|
|
"becomes:becoming:been:before:beforehand:\n"
|
|
"begin:beginning:behind:being:below:beside:\n"
|
|
"besides:between:beyond:billion:both:but:\n"
|
|
"can:cant:cannot:caption:could:couldnt:\n"
|
|
"did:didnt:does:doesnt:dont:down:during:\n"
|
|
"each:eight:eighty:either:else:elsewhere:\n"
|
|
"end:ending:enough:etc:even:ever:every:\n"
|
|
"everyone:everything:everywhere:except:few:\n"
|
|
"fifty:first:five:for:former:formerly:forty:\n"
|
|
"found:four:from:further:had:has:hasnt:have:\n"
|
|
"havent:hence:her:here:hereafter:hereby:\n"
|
|
"herein:heres:hereupon:hers:herself:hes:him:\n"
|
|
"himself:his:how:however:hundred:\n"
|
|
"inc:indeed:instead:into:isnt:its:\n"
|
|
"itself:last:later:latter:latterly:least:\n"
|
|
"less:let:like:likely:ltd:made:make:makes:\n"
|
|
"many:may:maybe:meantime:meanwhile:might:\n"
|
|
"million:miss:more:moreover:most:mostly:\n"
|
|
"mrs:much:must:myself:namely:neither:\n"
|
|
"never:nevertheless:next:nine:ninety:\n"
|
|
"nobody:none:nonetheless:noone:nor:not:\n"
|
|
"nothing:now:nowhere:off:often:once:\n"
|
|
"one:only:onto:others:otherwise:our:ours:\n"
|
|
"ourselves:out:over:overall:own:page:per:\n"
|
|
"perhaps:rather:recent:recently:same:\n"
|
|
"seem:seemed:seeming:seems:seven:seventy:\n"
|
|
"several:she:shes:should:shouldnt:since:six:\n"
|
|
"sixty:some:somehow:someone:something:\n"
|
|
"sometime:sometimes:somewhere:still:stop:\n"
|
|
"such:taking:ten:than:that:the:their:them:\n"
|
|
"themselves:then:thence:there:thereafter:\n"
|
|
"thereby:therefore:therein:thereupon:these:\n"
|
|
"they:thirty:this:those:though:thousand:\n"
|
|
"three:through:throughout:thru:thus:tips:\n"
|
|
"together:too:toward:towards:trillion:\n"
|
|
"twenty:two:under:unless:unlike:unlikely:\n"
|
|
"until:update:updated:updates:upon:\n"
|
|
"used:using:very:via:want:wanted:wants:\n"
|
|
"was:wasnt:way:ways:wed:well:were:\n"
|
|
"werent:what:whats:whatever:when:whence:\n"
|
|
"whenever:where:whereafter:whereas:whereby:\n"
|
|
"wherein:whereupon:wherever:wheres:whether:\n"
|
|
"which:while:whither:who:whoever:whole:\n"
|
|
"whom:whomever:whose:why:will:with:within:\n"
|
|
"without:wont:work:worked:works:working:\n"
|
|
"would:wouldnt:yes:yet:you:youd:youll:your:\n"
|
|
"youre:yours:yourself:yourselves:youve" );
|
|
|
|
TQFile f;
|
|
f.setName( dataPath(_lang) + "/bad_words" );
|
|
if (f.open(IO_WriteOnly))
|
|
{
|
|
TQTextStream ts( &f );
|
|
TQStringList words = TQStringList::split ( TQRegExp ( "[\n:]" ),
|
|
bad_words, false);
|
|
for ( TQStringList::ConstIterator it = words.begin();
|
|
it != words.end(); ++it )
|
|
ts << *it << endl;
|
|
f.close();
|
|
}
|
|
|
|
f.setName(fname);
|
|
if (f.open(IO_WriteOnly))
|
|
{
|
|
kdDebug() << "Writing config for " << _lang << " to " << fname << endl;
|
|
|
|
TQTextStream ts(&f);
|
|
|
|
ts << "database_dir:\t\t" << dataPath(_lang) << endl;
|
|
ts << "start_url:\t\t`" << dataPath(_lang) << "/files`" << endl;
|
|
ts << "local_urls:\t\tfile:/=/" << endl;
|
|
ts << "local_urls_only:\ttrue" << endl;
|
|
ts << "maximum_pages:\t\t1" << endl;
|
|
ts << "image_url_prefix:\t" << images << endl;
|
|
ts << "star_image:\t\t" << images << "star.png" << endl;
|
|
ts << "star_blank:\t\t" << images << "star_blank.png" << endl;
|
|
ts << "compression_level:\t6" << endl;
|
|
ts << "max_hop_count:\t\t0" << endl;
|
|
|
|
ts << "search_results_wrapper:\t" << wrapper << "wrapper.html" << endl;
|
|
ts << "nothing_found_file:\t" << wrapper << "nomatch.html" << endl;
|
|
ts << "syntax_error_file:\t" << wrapper << "syntax.html" << endl;
|
|
ts << "bad_word_list:\t\t" << dataPath(_lang) << "/bad_words" << endl;
|
|
ts << "external_parsers:\t" << "text/xml\t" << locate( "data", "khelpcenter/meinproc_wrapper" ) << endl;
|
|
f.close();
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
|
|
#define CHUNK_SIZE 15
|
|
|
|
bool HTMLSearch::generateIndex(TQString _lang, TQWidget *parent)
|
|
{
|
|
if (_lang == "C")
|
|
_lang = "en";
|
|
|
|
if (!createConfig(_lang))
|
|
return false;
|
|
|
|
// create progress dialog
|
|
progress = new ProgressDialog(parent);
|
|
progress->show();
|
|
kapp->processEvents();
|
|
|
|
// create files list ----------------------------------------------
|
|
if (!saveFilesList(_lang))
|
|
return false;
|
|
|
|
progress->setState(1);
|
|
|
|
// run htdig ------------------------------------------------------
|
|
TDEConfig *config = new TDEConfig("khelpcenterrc", true);
|
|
TDEConfigGroupSaver saver(config, "htdig");
|
|
TQString exe = config->readPathEntry("htdig", kapp->dirs()->findExe("htdig"));
|
|
|
|
if (exe.isEmpty())
|
|
{
|
|
delete config;
|
|
return false;
|
|
}
|
|
bool initial = true;
|
|
bool done = false;
|
|
int count = 0;
|
|
|
|
_filesToDig = _files.count();
|
|
progress->setFilesToDig(_filesToDig);
|
|
_filesDigged = 0;
|
|
|
|
TQDir d; d.mkdir(dataPath(_lang));
|
|
|
|
while (!done)
|
|
{
|
|
// kill old process
|
|
delete _proc;
|
|
|
|
// prepare new process
|
|
_proc = new TDEProcess();
|
|
*_proc << exe << "-v" << "-c" << dataPath(_lang)+"/htdig.conf";
|
|
if (initial)
|
|
{
|
|
*_proc << "-i";
|
|
initial = false;
|
|
}
|
|
|
|
kdDebug() << "Running htdig" << endl;
|
|
|
|
connect(_proc, TQT_SIGNAL(receivedStdout(TDEProcess *,char*,int)),
|
|
this, TQT_SLOT(htdigStdout(TDEProcess *,char*,int)));
|
|
|
|
connect(_proc, TQT_SIGNAL(processExited(TDEProcess *)),
|
|
this, TQT_SLOT(htdigExited(TDEProcess *)));
|
|
|
|
_htdigRunning = true;
|
|
|
|
// write out file
|
|
TQFile f(dataPath(_lang)+"/files");
|
|
if (f.open(IO_WriteOnly))
|
|
{
|
|
TQTextStream ts(&f);
|
|
|
|
for (int i=0; i<CHUNK_SIZE; ++i, ++count)
|
|
if (count < _filesToDig) {
|
|
ts << "file://" + _files[count] << endl;
|
|
} else {
|
|
done = true;
|
|
break;
|
|
}
|
|
f.close();
|
|
}
|
|
else
|
|
{
|
|
kdDebug() << "Could not open `files` for writing" << endl;
|
|
delete config;
|
|
return false;
|
|
}
|
|
|
|
|
|
// execute htdig
|
|
_proc->start(TDEProcess::NotifyOnExit, TDEProcess::Stdout );
|
|
|
|
kapp->enter_loop();
|
|
|
|
if (!_proc->normalExit() || _proc->exitStatus() != 0)
|
|
{
|
|
delete _proc;
|
|
delete progress;
|
|
delete config;
|
|
return false;
|
|
}
|
|
|
|
// _filesDigged += CHUNK_SIZE;
|
|
progress->setFilesDigged(_filesDigged);
|
|
kapp->processEvents();
|
|
}
|
|
|
|
progress->setState(2);
|
|
|
|
// run htmerge -----------------------------------------------------
|
|
exe = config->readPathEntry("htmerge", kapp->dirs()->findExe("htmerge"));
|
|
if (exe.isEmpty())
|
|
{
|
|
delete config;
|
|
return false;
|
|
}
|
|
delete _proc;
|
|
_proc = new TDEProcess();
|
|
*_proc << exe << "-c" << dataPath(_lang)+"/htdig.conf";
|
|
|
|
kdDebug() << "Running htmerge" << endl;
|
|
|
|
connect(_proc, TQT_SIGNAL(processExited(TDEProcess *)),
|
|
this, TQT_SLOT(htmergeExited(TDEProcess *)));
|
|
|
|
_htmergeRunning = true;
|
|
|
|
_proc->start(TDEProcess::NotifyOnExit, TDEProcess::Stdout);
|
|
|
|
kapp->enter_loop();
|
|
|
|
if (!_proc->normalExit() || _proc->exitStatus() != 0)
|
|
{
|
|
delete _proc;
|
|
delete progress;
|
|
delete config;
|
|
return false;
|
|
}
|
|
|
|
delete _proc;
|
|
|
|
progress->setState(3);
|
|
kapp->processEvents();
|
|
|
|
delete progress;
|
|
delete config;
|
|
|
|
return true;
|
|
}
|
|
|
|
|
|
|
|
void HTMLSearch::htdigStdout(TDEProcess *, char *buffer, int len)
|
|
{
|
|
TQString line = TQString(buffer).left(len);
|
|
|
|
int cnt=0, index=-1;
|
|
while ( (index = line.find("file://", index+1)) > 0)
|
|
cnt++;
|
|
_filesDigged += cnt;
|
|
|
|
cnt=0;
|
|
index=-1;
|
|
while ( (index = line.find("not changed", index+1)) > 0)
|
|
cnt++;
|
|
_filesDigged -= cnt;
|
|
|
|
progress->setFilesDigged(_filesDigged);
|
|
}
|
|
|
|
|
|
void HTMLSearch::htdigExited(TDEProcess *p)
|
|
{
|
|
kdDebug() << "htdig terminated " << p->exitStatus() << endl;
|
|
_htdigRunning = false;
|
|
kapp->exit_loop();
|
|
}
|
|
|
|
|
|
void HTMLSearch::htmergeExited(TDEProcess *)
|
|
{
|
|
kdDebug() << "htmerge terminated" << endl;
|
|
_htmergeRunning = false;
|
|
kapp->exit_loop();
|
|
}
|
|
|
|
|
|
void HTMLSearch::htsearchStdout(TDEProcess *, char *buffer, int len)
|
|
{
|
|
_searchResult += TQString::fromLocal8Bit(buffer,len);
|
|
}
|
|
|
|
|
|
void HTMLSearch::htsearchExited(TDEProcess *)
|
|
{
|
|
kdDebug() << "htsearch terminated" << endl;
|
|
_htsearchRunning = false;
|
|
kapp->exit_loop();
|
|
}
|
|
|
|
|
|
TQString HTMLSearch::search(TQString _lang, TQString words, TQString method, int matches,
|
|
TQString format, TQString sort)
|
|
{
|
|
if (_lang == "C")
|
|
_lang = "en";
|
|
|
|
createConfig(_lang);
|
|
|
|
TQString result = dataPath(_lang)+"/result.html";
|
|
|
|
// run htsearch ----------------------------------------------------
|
|
TDEConfig *config = new TDEConfig("khelpcenterrc", true);
|
|
TDEConfigGroupSaver saver(config, "htdig");
|
|
TQString exe = config->readPathEntry("htsearch", kapp->dirs()->findExe("htsearch"));
|
|
if (exe.isEmpty())
|
|
{
|
|
delete config;
|
|
return TQString::null;
|
|
}
|
|
_proc = new TDEProcess();
|
|
*_proc << exe << "-c" << dataPath(_lang)+"/htdig.conf" <<
|
|
TQString("words=%1;method=%2;matchesperpage=%3;format=%4;sort=%5").arg(words).arg(method).arg(matches).arg(format).arg(sort);
|
|
|
|
kdDebug() << "Running htsearch" << endl;
|
|
|
|
connect(_proc, TQT_SIGNAL(receivedStdout(TDEProcess *,char*,int)),
|
|
this, TQT_SLOT(htsearchStdout(TDEProcess *,char*,int)));
|
|
connect(_proc, TQT_SIGNAL(processExited(TDEProcess *)),
|
|
this, TQT_SLOT(htsearchExited(TDEProcess *)));
|
|
|
|
_htsearchRunning = true;
|
|
_searchResult = "";
|
|
|
|
_proc->start(TDEProcess::NotifyOnExit, TDEProcess::Stdout);
|
|
|
|
kapp->enter_loop();
|
|
|
|
if (!_proc->normalExit() || _proc->exitStatus() != 0)
|
|
{
|
|
kdDebug() << "Error running htsearch... returning now" << endl;
|
|
delete _proc;
|
|
delete config;
|
|
return TQString::null;
|
|
}
|
|
|
|
delete _proc;
|
|
|
|
// modify the search result
|
|
_searchResult = _searchResult.replace("http://localhost/", "file:/");
|
|
_searchResult = _searchResult.replace("Content-type: text/html", TQString::null);
|
|
|
|
// dump the search result
|
|
TQFile f(result);
|
|
if (f.open(IO_WriteOnly))
|
|
{
|
|
TQTextStream ts(&f);
|
|
|
|
ts << _searchResult << endl;
|
|
|
|
f.close();
|
|
delete config;
|
|
return result;
|
|
}
|
|
delete config;
|
|
return TQString::null;
|
|
}
|