You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
180 lines
3.5 KiB
180 lines
3.5 KiB
3 years ago
|
//
|
||
|
// txt2mifluz.cc
|
||
|
//
|
||
|
// txt2mifluz: stress test the Berkeley DB database and WordList interface.
|
||
|
//
|
||
|
// Part of the ht://Dig package <http://www.htdig.org/>
|
||
|
// Copyright (c) 1999-2004 The ht://Dig Group
|
||
|
// For copyright details, see the file COPYING in your distribution
|
||
|
// or the GNU Library General Public License (LGPL) version 2 or later
|
||
|
// <http://www.gnu.org/copyleft/lgpl.html>
|
||
|
//
|
||
|
// $Id: txt2mifluz.cc,v 1.4 2004/05/28 13:15:30 lha Exp $
|
||
|
//
|
||
|
|
||
|
#ifdef HAVE_CONFIG_H
|
||
|
#include <htconfig.h>
|
||
|
#endif /* HAVE_CONFIG_H */
|
||
|
|
||
|
#ifdef HAVE_UNISTD_H
|
||
|
#include <unistd.h>
|
||
|
#endif /* HAVE_UNISTD_H */
|
||
|
|
||
|
// If we have this, we probably want it.
|
||
|
#ifdef HAVE_GETOPT_H
|
||
|
#include <getopt.h>
|
||
|
#endif /* HAVE_GETOPT_H */
|
||
|
#ifdef HAVE_MALLOC_H
|
||
|
#include <malloc.h>
|
||
|
#endif /* HAVE_MALLOC_H */
|
||
|
#include <stdlib.h>
|
||
|
|
||
|
#include <htString.h>
|
||
|
#include <WordList.h>
|
||
|
#include <WordContext.h>
|
||
|
|
||
|
/*
|
||
|
* Store all options from the command line
|
||
|
*/
|
||
|
class params_t
|
||
|
{
|
||
|
public:
|
||
|
char* dbfile;
|
||
|
int compress;
|
||
|
};
|
||
|
|
||
|
/*
|
||
|
* Explain options
|
||
|
*/
|
||
|
static void usage();
|
||
|
/*
|
||
|
* Verbosity level set with -v (++)
|
||
|
*/
|
||
|
static int verbose = 0;
|
||
|
|
||
|
class WordSearch {
|
||
|
public:
|
||
|
WordSearch();
|
||
|
|
||
|
WordKey *Search(const String& expr);
|
||
|
WordKey *Search(const StringList& terms);
|
||
|
WordKey *Search(WordKey* keys);
|
||
|
|
||
|
WordKey* Terms2WordKey(const StringList& terms);
|
||
|
|
||
|
void DocumentSet(const WordKey& from, WordKey& to);
|
||
|
void DocumentCompare(const WordKey& a, const WordKey& b);
|
||
|
|
||
|
int limit_bottom;
|
||
|
int limit_count;
|
||
|
WordList* words;
|
||
|
};
|
||
|
|
||
|
WordSearch::WordSearch()
|
||
|
{
|
||
|
limit_bottom = 0;
|
||
|
limit_count = 0;
|
||
|
words = 0;
|
||
|
}
|
||
|
|
||
|
WordKey *WordSearch::Search(const String& expr)
|
||
|
{
|
||
|
return Search(StringList(expr, " \t"));
|
||
|
}
|
||
|
|
||
|
WordKey *WordSearch::Search(const StringList& terms)
|
||
|
{
|
||
|
return Search(Terms2WordKey(terms));
|
||
|
}
|
||
|
|
||
|
WordKey *WordSearch::Search(WordKey* keys)
|
||
|
{
|
||
|
// WordKey* AscendingFrequency(ter);
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
WordKey* WordSearch::Terms2WordKey(const StringList& terms)
|
||
|
{
|
||
|
WordKey* keys = new WordKey[terms.Count() + 1];
|
||
|
|
||
|
int i;
|
||
|
String* term;
|
||
|
ListCursor cursor;
|
||
|
terms.Start_Get(cursor);
|
||
|
for(i = 0; (term = (String*)terms.Get_Next(cursor)); i++) {
|
||
|
keys[i].SetWord(*term);
|
||
|
}
|
||
|
|
||
|
for(i = 0; !keys[i].Empty(); i++) {
|
||
|
fprintf(stderr, "%s\n", (char*)keys[i].Get());
|
||
|
}
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
|
||
|
// *****************************************************************************
|
||
|
// Entry point
|
||
|
//
|
||
|
int main(int ac, char **av)
|
||
|
{
|
||
|
int c;
|
||
|
extern char *optarg;
|
||
|
params_t params;
|
||
|
|
||
|
params.compress = 0;
|
||
|
params.dbfile = strdup("test");
|
||
|
|
||
|
while ((c = getopt(ac, av, "vB:f:z")) != -1)
|
||
|
{
|
||
|
switch (c)
|
||
|
{
|
||
|
case 'v':
|
||
|
verbose++;
|
||
|
break;
|
||
|
case 'B':
|
||
|
free(params.dbfile);
|
||
|
params.dbfile = strdup(optarg);
|
||
|
break;
|
||
|
case 'z':
|
||
|
params.compress = 1;
|
||
|
break;
|
||
|
case '?':
|
||
|
usage();
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
Configuration* config = WordContext::Initialize();
|
||
|
if(!config) {
|
||
|
fprintf(stderr, "txt2mifluz: no config file found\n");
|
||
|
exit(1);
|
||
|
}
|
||
|
|
||
|
if(params.compress) {
|
||
|
config->Add("wordlist_compress", "true");
|
||
|
}
|
||
|
|
||
|
WordList words(*config);
|
||
|
words.Open(params.dbfile, O_RDWR|O_TRUNC);
|
||
|
int inserted = words.Read(stdin);
|
||
|
if(verbose)
|
||
|
printf("inserted %d WordReferences\n", inserted);
|
||
|
words.Close();
|
||
|
|
||
|
delete config;
|
||
|
free(params.dbfile);
|
||
|
}
|
||
|
|
||
|
// *****************************************************************************
|
||
|
// void usage()
|
||
|
// Display program usage information
|
||
|
//
|
||
|
static void usage()
|
||
|
{
|
||
|
printf("usage: txt2mifluz [options] < txtfile\n");
|
||
|
printf("Options:\n");
|
||
|
printf("\t-v\t\tIncreases the verbosity\n");
|
||
|
printf("\t-B dbfile\tuse <dbfile> as a db file name (default test).\n");
|
||
|
exit(0);
|
||
|
}
|