You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1076 lines
30 KiB
1076 lines
30 KiB
3 years ago
|
//
|
||
|
// word.cc
|
||
|
//
|
||
|
// word: Implement tests for the word database related classes.
|
||
|
//
|
||
|
// Part of the ht://Dig package <http://www.htdig.org/>
|
||
|
// Copyright (c) 1999-2004 The ht://Dig Group
|
||
|
// For copyright details, see the file COPYING in your distribution
|
||
|
// or the GNU Library General Public License (LGPL) version 2 or later
|
||
|
// <http://www.gnu.org/copyleft/lgpl.html>
|
||
|
//
|
||
|
// $Id: word.cc,v 1.19 2004/05/28 13:15:30 lha Exp $
|
||
|
//
|
||
|
|
||
|
#ifdef HAVE_CONFIG_H
|
||
|
#include "htconfig.h"
|
||
|
#endif /* HAVE_CONFIG_H */
|
||
|
|
||
|
#include <fcntl.h>
|
||
|
#include <unistd.h>
|
||
|
#include <stdlib.h>
|
||
|
#include <stdio.h>
|
||
|
#include <ctype.h>
|
||
|
|
||
|
// If we have this, we probably want it.
|
||
|
#ifdef HAVE_GETOPT_H
|
||
|
#include <getopt.h>
|
||
|
#endif
|
||
|
|
||
|
#include "WordKey.h"
|
||
|
#include "WordList.h"
|
||
|
#include "WordContext.h"
|
||
|
#include "Configuration.h"
|
||
|
|
||
|
static ConfigDefaults config_defaults[] = {
|
||
|
{ "word_db", "test", 0 },
|
||
|
{ 0 }
|
||
|
};
|
||
|
|
||
|
static Configuration* config = 0;
|
||
|
|
||
|
typedef struct
|
||
|
{
|
||
|
int key;
|
||
|
int list;
|
||
|
int skip;
|
||
|
int compress;
|
||
|
int env;
|
||
|
} params_t;
|
||
|
|
||
|
static void usage();
|
||
|
static void doword(params_t* params);
|
||
|
static void dolist(params_t* params);
|
||
|
static void dokey(params_t* params);
|
||
|
static void doskip(params_t* params);
|
||
|
static void doenv(params_t* params);
|
||
|
static void pack_show_wordreference(const WordReference& wordRef);
|
||
|
static void pack_show_key(const String& key);
|
||
|
|
||
|
static int verbose = 0;
|
||
|
|
||
|
// *****************************************************************************
|
||
|
// int main(int ac, char **av)
|
||
|
//
|
||
|
|
||
|
int main(int ac, char **av)
|
||
|
{
|
||
|
int c;
|
||
|
params_t params;
|
||
|
|
||
|
params.key = 0;
|
||
|
params.list = 0;
|
||
|
params.skip = 0;
|
||
|
params.env = 0;
|
||
|
params.compress = 0;
|
||
|
|
||
|
while ((c = getopt(ac, av, "ve:klbszw:")) != -1)
|
||
|
{
|
||
|
switch (c)
|
||
|
{
|
||
|
case 'v':
|
||
|
verbose++;
|
||
|
break;
|
||
|
case 'k':
|
||
|
params.key = 1;
|
||
|
break;
|
||
|
case 'l':
|
||
|
params.list = 1;
|
||
|
break;
|
||
|
case 's':
|
||
|
params.skip = 1;
|
||
|
break;
|
||
|
case 'e':
|
||
|
params.env = atoi(optarg);
|
||
|
break;
|
||
|
case 'z':
|
||
|
params.compress = 1;
|
||
|
break;
|
||
|
case '?':
|
||
|
usage();
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
doword(¶ms);
|
||
|
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
//
|
||
|
// mifluz.conf structure
|
||
|
//
|
||
|
#define WORD_DOCID 1
|
||
|
#define WORD_FLAGS 2
|
||
|
#define WORD_LOCATION 3
|
||
|
|
||
|
static void doword(params_t* params)
|
||
|
{
|
||
|
if(params->key) {
|
||
|
if(verbose) fprintf(stderr, "Test WordKey class\n");
|
||
|
dokey(params);
|
||
|
}
|
||
|
|
||
|
if(params->list || params->skip || params->env) {
|
||
|
config = WordContext::Initialize(config_defaults);
|
||
|
if(params->compress) {
|
||
|
config->Add("wordlist_compress", "true");
|
||
|
}
|
||
|
if(verbose > 2) {
|
||
|
String tmp;
|
||
|
tmp << (verbose - 2);
|
||
|
config->Add("wordlist_verbose", tmp);
|
||
|
}
|
||
|
if(params->env) {
|
||
|
config->Add("wordlist_env_share", "true");
|
||
|
config->Add("wordlist_env_dir", ".");
|
||
|
}
|
||
|
|
||
|
WordContext::Initialize(*config);
|
||
|
}
|
||
|
|
||
|
|
||
|
if(params->list) {
|
||
|
if(verbose) fprintf(stderr, "Test WordList class\n");
|
||
|
dolist(params);
|
||
|
}
|
||
|
|
||
|
if(params->skip) {
|
||
|
if(verbose) fprintf(stderr, "Test WordList::SkipUselessSequentialWalking method\n");
|
||
|
doskip(params);
|
||
|
}
|
||
|
|
||
|
if(params->env) {
|
||
|
if(verbose) fprintf(stderr, "Test WordList with shared env\n");
|
||
|
doenv(params);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
static void dolist(params_t*)
|
||
|
{
|
||
|
static char* word_list[] = {
|
||
|
"The", // DocID = 1
|
||
|
"quick", // DocID = 2
|
||
|
"brown", // DocID = 3
|
||
|
"fox", // DocID = 4
|
||
|
"jumps", // DocID = 5
|
||
|
"over", // DocID = 6
|
||
|
"the", // DocID = 7
|
||
|
"lazy", // DocID = 8
|
||
|
"dog", // DocID = 9
|
||
|
0
|
||
|
};
|
||
|
|
||
|
//
|
||
|
// Most simple case. Insert a few words and
|
||
|
// search them, using exact match.
|
||
|
//
|
||
|
{
|
||
|
|
||
|
// setup a new wordlist
|
||
|
WordList words(*config);
|
||
|
if(verbose)WordKeyInfo::Instance()->Show();
|
||
|
words.Open((*config)["word_db"], O_RDWR);
|
||
|
|
||
|
|
||
|
// create entries from word_list
|
||
|
WordReference wordRef;
|
||
|
wordRef.Key().Set(WORD_FLAGS, 67);
|
||
|
unsigned int location = 0;
|
||
|
unsigned int anchor = 0;
|
||
|
unsigned int docid = 1;
|
||
|
if(verbose) fprintf(stderr, "Inserting\n");
|
||
|
|
||
|
for(char** p = word_list; *p; p++) {
|
||
|
if(verbose > 4) fprintf(stderr, "inserting word: %s\n", *p);
|
||
|
wordRef.Key().SetWord(*p);
|
||
|
wordRef.Key().Set(WORD_DOCID, docid);
|
||
|
wordRef.Key().Set(WORD_LOCATION, location);
|
||
|
wordRef.Record().info.data = anchor;
|
||
|
if(verbose > 1) fprintf(stderr, "%s\n", (char*)wordRef.Get());
|
||
|
if(verbose > 2) pack_show_wordreference(wordRef);
|
||
|
words.Insert(wordRef);
|
||
|
location += strlen(*p);
|
||
|
anchor++;
|
||
|
docid++;
|
||
|
}
|
||
|
words.Close();
|
||
|
|
||
|
location = anchor = 0;
|
||
|
docid = 1;
|
||
|
|
||
|
if(verbose) fprintf(stderr, "Searching\n");
|
||
|
|
||
|
// reopen wordlist
|
||
|
words.Open((*config)["word_db"], O_RDONLY);
|
||
|
// check if each word (from word_list) is there
|
||
|
for(char** p = word_list; *p; p++)
|
||
|
{
|
||
|
// recreate wordref from each word
|
||
|
wordRef.Key().SetWord(*p);
|
||
|
wordRef.Key().Set(WORD_LOCATION, location);
|
||
|
wordRef.Record().info.data = anchor;
|
||
|
wordRef.Key().Set(WORD_DOCID, docid);
|
||
|
|
||
|
location += strlen(*p);
|
||
|
anchor++;
|
||
|
docid++;
|
||
|
|
||
|
//
|
||
|
// Skip first word because we don't want to deal with upper/lower case at present.
|
||
|
//
|
||
|
if(p == word_list) continue;
|
||
|
|
||
|
// check if wordref is in wordlist
|
||
|
if(verbose) fprintf(stderr, "searching for %s ... ", *p);
|
||
|
if(verbose > 2) pack_show_wordreference(wordRef);
|
||
|
if(verbose > 1) fprintf(stderr, "%s\n", (char*)wordRef.Get());
|
||
|
// find matches in wordlist
|
||
|
List *result = words[wordRef];
|
||
|
if(!result) {
|
||
|
fprintf(stderr, "dolist: words[wordRef] returned null pointer\n");
|
||
|
exit(1);
|
||
|
}
|
||
|
result->Start_Get();
|
||
|
int count = 0;
|
||
|
WordReference* found;
|
||
|
// loop through found matches
|
||
|
while((found = (WordReference*)result->Get_Next()))
|
||
|
{
|
||
|
if(wordRef.Key().GetWord() != found->Key().GetWord())
|
||
|
{
|
||
|
fprintf(stderr, "dolist: simple: expected %s, got %s\n", (char*)wordRef.Key().GetWord(), (char*)found->Key().GetWord());
|
||
|
exit(1);
|
||
|
}
|
||
|
count++;
|
||
|
}
|
||
|
if(count != 1) {
|
||
|
fprintf(stderr, "dolist: simple: searching %s, got %d matches instead of 1\n", (char*)wordRef.Key().GetWord(), count);
|
||
|
exit(1);
|
||
|
}
|
||
|
if(verbose) fprintf(stderr, "done\n");
|
||
|
|
||
|
delete result;
|
||
|
|
||
|
}
|
||
|
}
|
||
|
//
|
||
|
// Print all records as sorted within Berkeley DB with number
|
||
|
// of occurrences.
|
||
|
//
|
||
|
if(verbose) {
|
||
|
WordList words(*config);
|
||
|
words.Open((*config)["word_db"], O_RDWR);
|
||
|
|
||
|
List *result = words.Words();
|
||
|
if(result == 0) {
|
||
|
fprintf(stderr, "dolist: getting all words failed\n");
|
||
|
exit(1);
|
||
|
}
|
||
|
result->Start_Get();
|
||
|
int count = 0;
|
||
|
String* found;
|
||
|
while((found = (String*)result->Get_Next())) {
|
||
|
unsigned int noccurrence;
|
||
|
WordKey key;
|
||
|
key.SetWord(*found);
|
||
|
words.Noccurrence(key, noccurrence);
|
||
|
fprintf(stderr, "%s (%d)\n", (char*)(*found), noccurrence);
|
||
|
count++;
|
||
|
}
|
||
|
if(count != 8) {
|
||
|
fprintf(stderr, "dolist: getting all words, got %d matches instead of 8\n", count);
|
||
|
exit(1);
|
||
|
}
|
||
|
|
||
|
delete result;
|
||
|
}
|
||
|
//
|
||
|
// Search all occurrences of 'the'
|
||
|
//
|
||
|
{
|
||
|
WordList words(*config);
|
||
|
words.Open((*config)["word_db"], O_RDWR);
|
||
|
|
||
|
WordReference wordRef;
|
||
|
wordRef.Key().SetWord("the");
|
||
|
|
||
|
unsigned int noccurrence;
|
||
|
if(words.Noccurrence(wordRef.Key(), noccurrence) != OK) {
|
||
|
fprintf(stderr, "dolist: get ref count of 'the' failed\n");
|
||
|
exit(1);
|
||
|
} else if(noccurrence != 2) {
|
||
|
fprintf(stderr, "dolist: get ref count of 'the' failed, got %d instead of 2\n", noccurrence);
|
||
|
exit(1);
|
||
|
}
|
||
|
List *result = words[wordRef];
|
||
|
result->Start_Get();
|
||
|
int count = 0;
|
||
|
WordReference* found;
|
||
|
while((found = (WordReference*)result->Get_Next())) {
|
||
|
if(wordRef.Key().GetWord() != found->Key().GetWord()) {
|
||
|
fprintf(stderr, "dolist: simple: expected %s, got %s\n", (char*)wordRef.Key().GetWord(), (char*)found->Key().GetWord());
|
||
|
exit(1);
|
||
|
}
|
||
|
if(verbose) fprintf(stderr, "%s\n", (char*)found->Get());
|
||
|
count++;
|
||
|
}
|
||
|
if(count != 2) {
|
||
|
fprintf(stderr, "dolist: searching occurrences of '%s', got %d matches instead of 2\n", (char*)wordRef.Key().GetWord(), count);
|
||
|
exit(1);
|
||
|
}
|
||
|
|
||
|
delete result;
|
||
|
}
|
||
|
//
|
||
|
// Delete all occurrences of 'the'
|
||
|
//
|
||
|
{
|
||
|
WordList words(*config);
|
||
|
words.Open((*config)["word_db"], O_RDWR);
|
||
|
|
||
|
WordReference wordRef("the");
|
||
|
if(verbose) {
|
||
|
fprintf(stderr, "**** Delete test:\n");
|
||
|
words.Write(stderr);
|
||
|
fprintf(stderr, "**** Delete test:\n");
|
||
|
}
|
||
|
int count;
|
||
|
if((count = words.WalkDelete(wordRef)) != 2) {
|
||
|
fprintf(stderr, "dolist: delete occurrences of 'the', got %d deletion instead of 2\n", count);
|
||
|
exit(1);
|
||
|
}
|
||
|
|
||
|
List* result = words[wordRef];
|
||
|
if(result->Count() != 0) {
|
||
|
fprintf(stderr, "dolist: unexpectedly found 'the' \n");
|
||
|
exit(1);
|
||
|
}
|
||
|
delete result;
|
||
|
|
||
|
unsigned int noccurrence;
|
||
|
if(words.Noccurrence(wordRef.Key(), noccurrence) != OK) {
|
||
|
fprintf(stderr, "dolist: get ref count of 'thy' failed\n");
|
||
|
exit(1);
|
||
|
} else if(noccurrence != 0) {
|
||
|
fprintf(stderr, "dolist: get ref count of 'thy' failed, got %d instead of 0\n", noccurrence);
|
||
|
exit(1);
|
||
|
}
|
||
|
}
|
||
|
//
|
||
|
// Delete all words in document 5 (only one word : jumps)
|
||
|
//
|
||
|
{
|
||
|
WordList words(*config);
|
||
|
words.Open((*config)["word_db"], O_RDWR);
|
||
|
|
||
|
WordReference wordRef;
|
||
|
wordRef.Key().Set(WORD_DOCID, 5);
|
||
|
int count;
|
||
|
if((count = words.WalkDelete(wordRef)) != 1) {
|
||
|
fprintf(stderr, "dolist: delete occurrences in DocID 5, %d deletion instead of 1\n", count);
|
||
|
exit(1);
|
||
|
}
|
||
|
|
||
|
wordRef.Clear();
|
||
|
wordRef.Key().SetWord("jumps");
|
||
|
List* result = words[wordRef];
|
||
|
if(result->Count() != 0) {
|
||
|
fprintf(stderr, "dolist: unexpectedly found 'jumps' \n");
|
||
|
exit(1);
|
||
|
}
|
||
|
delete result;
|
||
|
|
||
|
unsigned int noccurrence;
|
||
|
if(words.Noccurrence(wordRef.Key(), noccurrence) != OK) {
|
||
|
fprintf(stderr, "dolist: get ref count of 'jumps' failed\n");
|
||
|
exit(1);
|
||
|
} else if(noccurrence != 0) {
|
||
|
fprintf(stderr, "dolist: get ref count of 'jumps' failed, got %d instead of 0\n", noccurrence);
|
||
|
exit(1);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
#define WORD_BIT_MASK(b) ((b) == 32 ? 0xffffffff : (( 1 << (b)) - 1))
|
||
|
|
||
|
//
|
||
|
// See WordKey.h
|
||
|
// Tested: Pack, Unpack, Compare (both forms), accessors, meta information
|
||
|
//
|
||
|
static void
|
||
|
dokey(params_t* params)
|
||
|
{
|
||
|
static char *key_descs[] = {
|
||
|
"Word/DocID 5/Flags 8/Location 19",
|
||
|
"Word/DocID 3/Location 2/Flags 11",
|
||
|
"Word/DocID 3/Flags 8/Location 5",
|
||
|
"Word/DocID 3/Flags 14/Location 7",
|
||
|
"Word/DocID 3/Flags 9/Location 7/Foo1 13/Foo2 16",
|
||
|
0,
|
||
|
};
|
||
|
char** key_desc;
|
||
|
|
||
|
for(key_desc = key_descs; *key_desc; key_desc++) {
|
||
|
WordKeyInfo::InitializeFromString(*key_desc);
|
||
|
|
||
|
if(verbose)
|
||
|
WordKeyInfo::Instance()->Show();
|
||
|
|
||
|
WordKey word;
|
||
|
word.SetWord("aword");
|
||
|
int j;
|
||
|
for(j = WORD_FIRSTFIELD; j < word.NFields(); j++) {
|
||
|
WordKeyNum value = (0xdededede & word.MaxValue(j));
|
||
|
word.Set(j, value);
|
||
|
}
|
||
|
if(verbose > 1) fprintf(stderr, "WORD: %s\n", (char*)word.Get());
|
||
|
|
||
|
String packed;
|
||
|
word.Pack(packed);
|
||
|
|
||
|
WordKey other_word;
|
||
|
other_word.Unpack(packed);
|
||
|
if(verbose > 1) fprintf(stderr, "OTHER_WORD: %s\n", (char*)other_word.Get());
|
||
|
|
||
|
int failed = 0 ;
|
||
|
for(j = WORD_FIRSTFIELD; j < word.NFields(); j++) {
|
||
|
if(word.Get(j) != other_word.Get(j)) {
|
||
|
failed = 1;
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
if(word.GetWord() != other_word.GetWord() ||
|
||
|
!word.IsDefined(0) ||
|
||
|
!other_word.IsDefined(0))
|
||
|
failed = 1;
|
||
|
|
||
|
if(failed) {
|
||
|
fprintf(stderr, "Original and packed/unpacked not equal\n");
|
||
|
WordKeyInfo::Instance()->Show();
|
||
|
fprintf(stderr, "WORD: %s\n", (char*)word.Get());
|
||
|
pack_show_key(packed);
|
||
|
fprintf(stderr, "OTHER_WORD: %s\n", (char*)other_word.Get());
|
||
|
exit(1);
|
||
|
}
|
||
|
|
||
|
//
|
||
|
// Compare in packed form
|
||
|
//
|
||
|
if(!word.PackEqual(other_word))
|
||
|
{
|
||
|
fprintf(stderr, "dokey: %s not equal (object compare)\n", *key_desc);
|
||
|
exit(1);
|
||
|
}
|
||
|
|
||
|
//
|
||
|
// Pack the other_word
|
||
|
//
|
||
|
String other_packed;
|
||
|
|
||
|
other_word.Pack(other_packed);
|
||
|
//
|
||
|
// The two (word and other_word) must compare equal
|
||
|
// using the alternate comparison (fast) interface.
|
||
|
//
|
||
|
if(WordKey::Compare(packed, other_packed) != 0) {
|
||
|
fprintf(stderr, "dokey: %s not equal (fast compare)\n", *key_desc);
|
||
|
exit(1);
|
||
|
}
|
||
|
|
||
|
word.SetWord("Test string");
|
||
|
word.Set(WORD_DOCID,1);
|
||
|
other_word.SetWord("Test string");
|
||
|
word.Pack(packed);
|
||
|
//
|
||
|
// Add one char to the word, they must not compare equal and
|
||
|
// the difference must be minus one.
|
||
|
//
|
||
|
other_word.GetWord().append("a");
|
||
|
other_word.Pack(other_packed);
|
||
|
{
|
||
|
int ret;
|
||
|
if((ret = WordKey::Compare(packed, other_packed)) != -1)
|
||
|
{
|
||
|
fprintf(stderr, "%s\n", (char*)word.Get());
|
||
|
fprintf(stderr, "%s\n", (char*)other_word.Get());
|
||
|
fprintf(stderr, "dokey: %s different length, expected -1 got %d\n", *key_desc, ret);
|
||
|
exit(1);
|
||
|
}
|
||
|
}
|
||
|
other_word.SetWord("Test string");
|
||
|
|
||
|
//
|
||
|
// Change T to S
|
||
|
// the difference must be one.
|
||
|
//
|
||
|
{
|
||
|
String& tmp = other_word.GetWord();
|
||
|
tmp[tmp.indexOf('T')] = 'S';
|
||
|
}
|
||
|
other_word.Pack(other_packed);
|
||
|
{
|
||
|
int ret;
|
||
|
if((ret = WordKey::Compare(packed, other_packed)) != 1)
|
||
|
{
|
||
|
fprintf(stderr, "%s\n", (char*)word.Get());
|
||
|
fprintf(stderr, "%s\n", (char*)other_word.Get());
|
||
|
fprintf(stderr, "dokey: %s different letter (S instead of T), expected 1 got %d\n", *key_desc, ret);
|
||
|
exit(1);
|
||
|
}
|
||
|
}
|
||
|
other_word.SetWord("Test string");
|
||
|
|
||
|
//
|
||
|
// Substract one to the first numeric field
|
||
|
// The difference must be one.
|
||
|
//
|
||
|
other_word.Set(WORD_DOCID,word.Get(WORD_DOCID) - 1);
|
||
|
other_word.Pack(other_packed);
|
||
|
{
|
||
|
int ret;
|
||
|
if((ret = WordKey::Compare(packed, other_packed)) != 1)
|
||
|
{
|
||
|
fprintf(stderr, "%s\n", (char*)word.Get());
|
||
|
fprintf(stderr, "%s\n", (char*)other_word.Get());
|
||
|
fprintf(stderr, "dokey: %s different numeric field, expected 1 got %d\n", *key_desc, ret);
|
||
|
exit(1);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
//
|
||
|
// WordKey::Diff function
|
||
|
//
|
||
|
{
|
||
|
WordKey word("Test1 <DEF> 1 2 3 4 5");
|
||
|
WordKey other_word("Sest1 <DEF> 1 2 3 4 5");
|
||
|
//
|
||
|
// Diff must say that field 0 differ and that word is lower than other_word
|
||
|
//
|
||
|
{
|
||
|
int position = 0;
|
||
|
int lower = 0;
|
||
|
if(!word.Diff(other_word, position, lower)) {
|
||
|
fprintf(stderr, "%s\n", (char*)word.Get());
|
||
|
fprintf(stderr, "%s\n", (char*)other_word.Get());
|
||
|
fprintf(stderr, "dokey: diff failed\n");
|
||
|
exit(1);
|
||
|
}
|
||
|
if(position != 0 || lower != 1) {
|
||
|
fprintf(stderr, "%s\n", (char*)word.Get());
|
||
|
fprintf(stderr, "%s\n", (char*)other_word.Get());
|
||
|
fprintf(stderr, "dokey: diff expected position = 0 and lower = 1 but got position = %d and lower = %d\n", position, lower);
|
||
|
exit(1);
|
||
|
}
|
||
|
}
|
||
|
//
|
||
|
// Only compare prefix
|
||
|
//
|
||
|
other_word.SetWord("Test");
|
||
|
other_word.UndefinedWordSuffix();
|
||
|
other_word.Set(WORD_DOCID, 5);
|
||
|
{
|
||
|
int position = 0;
|
||
|
int lower = 0;
|
||
|
if(!word.Diff(other_word, position, lower)) {
|
||
|
fprintf(stderr, "dokey: diff failed\n");
|
||
|
exit(1);
|
||
|
}
|
||
|
if(position != 1 || lower != 1) {
|
||
|
fprintf(stderr, "%s\n", (char*)word.Get());
|
||
|
fprintf(stderr, "%s\n", (char*)other_word.Get());
|
||
|
fprintf(stderr, "dokey: diff expected position = 1 and lower = 1 but got position = %d and lower = %d\n", position, lower);
|
||
|
exit(1);
|
||
|
}
|
||
|
}
|
||
|
//
|
||
|
// Same key have no diff
|
||
|
//
|
||
|
{
|
||
|
int position = 0;
|
||
|
int lower = 0;
|
||
|
if(word.Diff(word, position, lower)) {
|
||
|
fprintf(stderr, "dokey: diff found when comparing %s with itself\n", (char*)word.Get());
|
||
|
exit(1);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
static void pack_show_key(const String& key)
|
||
|
{
|
||
|
int i;
|
||
|
char c;
|
||
|
|
||
|
for(i=0; i < key.length(); i++) {
|
||
|
c = (isprint(key[i]) ? key[i] : '#');
|
||
|
fprintf(stderr, "%c-%2x ", c, key[i]);
|
||
|
}
|
||
|
fprintf(stderr, "\n");
|
||
|
|
||
|
for(i = 0; i < key.length(); i++) {
|
||
|
int j;
|
||
|
for(j = 0; j < 8; j++)
|
||
|
fprintf(stderr, "%c", (key[j] & (1<<(j))) ? '1' : '0');
|
||
|
}
|
||
|
|
||
|
fprintf(stderr, "\n");
|
||
|
fprintf(stderr, "^0 ^1 ^2 ^3 ^4 ^5 ^6 ^7\n");
|
||
|
fprintf(stderr, "0123456701234567012345670123456701234567012345670123456701234567\n");
|
||
|
}
|
||
|
|
||
|
static void pack_show_wordreference(const WordReference& wordRef)
|
||
|
{
|
||
|
String key;
|
||
|
String record;
|
||
|
|
||
|
wordRef.Pack(key, record);
|
||
|
|
||
|
fprintf(stderr, "key = ");
|
||
|
for(int i = 0; i < key.length(); i++) {
|
||
|
fprintf(stderr, "0x%02x(%c) ", key[i] & 0xff, key[i]);
|
||
|
}
|
||
|
fprintf(stderr, " record = ");
|
||
|
for(int i = 0; i < record.length(); i++) {
|
||
|
fprintf(stderr, "0x%02x(%c) ", record[i] & 0xff, record[i]);
|
||
|
}
|
||
|
fprintf(stderr, "\n");
|
||
|
}
|
||
|
|
||
|
|
||
|
|
||
|
//*****************************************************************************
|
||
|
// void doskip()
|
||
|
// Test SkipUselessSequentialWalking in WordList class
|
||
|
//
|
||
|
static void doskip_normal(params_t*);
|
||
|
static void doskip_harness(params_t*);
|
||
|
static void doskip_overflow(params_t*);
|
||
|
static void doskip_try(WordList& words, WordCursor& search, char* found_string, char* expected_string);
|
||
|
|
||
|
static void doskip(params_t* params)
|
||
|
{
|
||
|
if(verbose) fprintf(stderr, "Test WordList::SkipUselessSequentialWalking normal\n");
|
||
|
doskip_normal(params);
|
||
|
if(verbose) fprintf(stderr, "Test WordList::SkipUselessSequentialWalking harness\n");
|
||
|
doskip_harness(params);
|
||
|
if(verbose) fprintf(stderr, "Test WordList::SkipUselessSequentialWalking overflow\n");
|
||
|
doskip_overflow(params);
|
||
|
}
|
||
|
|
||
|
static void doskip_try(WordList& words, WordCursor& search, char* found_string, char* expected_string)
|
||
|
{
|
||
|
const WordKey& found = search.GetFound().Key();
|
||
|
((WordKey&)found).Set(found_string);
|
||
|
if(search.SkipUselessSequentialWalking() == NOTOK) {
|
||
|
fprintf(stderr, "doskip_try: SkipUselessSequentialWalking NOTOK searching %s at step %s expecting %s\n", (char*)search.GetSearch().Get(), (char*)found.Get(), (char*)expected_string);
|
||
|
exit(1);
|
||
|
}
|
||
|
|
||
|
WordKey expected(expected_string);
|
||
|
if(!found.ExactEqual(expected)) {
|
||
|
fprintf(stderr, "doskip_try: expected %s but got %s\n", (char*)expected.Get(), (char*)found.Get());
|
||
|
exit(1);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
//
|
||
|
// Create artificial WordCursor context
|
||
|
// in which SkipUselessSequentialWalking calls SetToFollowing
|
||
|
// that triggers overflow condition.
|
||
|
//
|
||
|
static void doskip_overflow(params_t*)
|
||
|
{
|
||
|
#define WORD_FIELD1 1
|
||
|
#define WORD_FIELD2 2
|
||
|
#define WORD_FIELD3 3
|
||
|
|
||
|
static ConfigDefaults config_defaults[] = {
|
||
|
{ "wordlist_wordkey_description", "Word/FIELD1 32/FIELD2 8/FIELD3 16", 0 },
|
||
|
{ 0 }
|
||
|
};
|
||
|
Configuration config;
|
||
|
config.Defaults(config_defaults);
|
||
|
if(verbose > 2) config.Add("wordlist_verbose", "3");
|
||
|
WordContext::Initialize(config);
|
||
|
{
|
||
|
WordList* words = new WordList(config);
|
||
|
|
||
|
//
|
||
|
// Looking for zebra at location 3
|
||
|
//
|
||
|
WordKey key("zebra <UNDEF> <UNDEF> <UNDEF> 3");
|
||
|
WordCursor *search = words->Cursor(key);
|
||
|
|
||
|
{
|
||
|
//
|
||
|
// Pretend we found zebra <DEF> 3 <MAX> 7
|
||
|
// That is a valid candidate for SkipUselessSequentialWalking
|
||
|
//
|
||
|
String found;
|
||
|
found << "zebra <DEF> 3 " << WordKey::MaxValue(WORD_FIELD2) << " 7";
|
||
|
|
||
|
//
|
||
|
// Overflow on FIELD2 must trigger ++ on FIELD1
|
||
|
//
|
||
|
String expected("zebra <DEF> 4 0 3");
|
||
|
doskip_try(*words, *search, found, expected);
|
||
|
}
|
||
|
|
||
|
{
|
||
|
//
|
||
|
// Prented we found zebra <DEF> <MAX> <MAX> 7
|
||
|
// That is a valid candidate for SkipUselessSequentialWalking
|
||
|
//
|
||
|
String found;
|
||
|
found << "zebra <DEF> " << WordKey::MaxValue(WORD_FIELD1) << " " << WordKey::MaxValue(WORD_FIELD2) << " 7";
|
||
|
|
||
|
//
|
||
|
// Overflow on FIELD2 must trigger append \001 on word Word
|
||
|
//
|
||
|
String expected("zebra\001 <DEF> 0 0 3");
|
||
|
doskip_try(*words, *search, found, expected);
|
||
|
|
||
|
//
|
||
|
// Cannot increment, SkipUselessSequentialWalking returns NOTOK
|
||
|
//
|
||
|
((WordKey&)search->GetSearch()).SetDefinedWordSuffix();
|
||
|
((WordReference&)search->GetFound()).Key().Set(found);
|
||
|
if(search->SkipUselessSequentialWalking() != WORD_WALK_ATEND) {
|
||
|
fprintf(stderr, "doskip_overflow: SkipUselessSequentialWalking expected NOTOK & WORD_WALK_ATEND searching %s\n", (char*)key.Get());
|
||
|
exit(1);
|
||
|
}
|
||
|
|
||
|
}
|
||
|
|
||
|
delete search;
|
||
|
words->Close();
|
||
|
delete words;
|
||
|
}
|
||
|
|
||
|
//
|
||
|
// Restore default configuration
|
||
|
//
|
||
|
WordContext::Initialize(*::config);
|
||
|
|
||
|
#undef WORD_FIELD1
|
||
|
#undef WORD_FIELD2
|
||
|
#undef WORD_FIELD3
|
||
|
}
|
||
|
|
||
|
//
|
||
|
// Create artificial WordCursor contexts
|
||
|
// that covers all possible behaviour of SkipUselessSequentialWalking.
|
||
|
//
|
||
|
static void doskip_harness(params_t*)
|
||
|
{
|
||
|
#define WORD_FIELD1 1
|
||
|
#define WORD_FIELD2 2
|
||
|
#define WORD_FIELD3 3
|
||
|
#define WORD_FIELD4 4
|
||
|
#define WORD_FIELD5 5
|
||
|
|
||
|
static ConfigDefaults config_defaults[] = {
|
||
|
{ "wordlist_wordkey_description", "Word/FIELD1 8/FIELD2 8/FIELD3 8/FIELD4 8/FIELD5 8", 0 },
|
||
|
{ 0 }
|
||
|
};
|
||
|
Configuration config;
|
||
|
config.Defaults(config_defaults);
|
||
|
if(verbose > 2) config.Add("wordlist_verbose", "3");
|
||
|
WordContext::Initialize(config);
|
||
|
{
|
||
|
WordList* words = new WordList(config);
|
||
|
|
||
|
//
|
||
|
// Searching
|
||
|
//
|
||
|
// z <UNDEF> <UNDEF> 5 <UNDEF> 4 <UNDEF>
|
||
|
//
|
||
|
// in data set
|
||
|
//
|
||
|
// DATA SEE STATUS OPERATION
|
||
|
// zebra <DEF> 1 5 1 4 3 found next
|
||
|
// zebra <DEF> 1 6 1 4 3 a nomatch skip to zebra <DEF> 2 5 0 4 0
|
||
|
// zebra <DEF> 1 6 2 4 3 ignore
|
||
|
// zebra <DEF> 2 3 1 4 3 ignore
|
||
|
// zebra <DEF> <MAX> 6 1 4 3 b nomatch skip to zebra\001 <DEF> 0 5 0 4 0
|
||
|
// zebra <DEF> <MAX> 7 1 4 3 ignore
|
||
|
// zebra <DEF> <MAX> 8 1 4 3 ignore
|
||
|
// zebra <DEF> <MAX> 9 1 4 3 ignore
|
||
|
// zippo <DEF> 0 3 1 4 3 ignore
|
||
|
// zippo <DEF> 8 5 1 1 3 c nomatch skip to zippo <DEF> 8 5 1 4 0
|
||
|
// zippo <DEF> 8 5 1 2 3 ignore
|
||
|
// zippo <DEF> 8 5 1 2 5 ignore
|
||
|
// zippo <DEF> 8 5 1 2 9 ignore
|
||
|
// zippo <DEF> 8 5 1 3 9 ignore
|
||
|
// zorro <DEF> 3 5 <MAX> 6 3 d nomatch skip to zorro <DEF> 4 5 0 4 0
|
||
|
// zorro <DEF> 3 5 <MAX> 6 5 ignore
|
||
|
// zorro <DEF> 3 5 <MAX> 8 5 ignore
|
||
|
// zorro <DEF> 4 5 2 4 3 found
|
||
|
//
|
||
|
// legend: status is what WalkNextStep function says about the key
|
||
|
// nomatch means searchKey.Equal(found.Key()) is false
|
||
|
// found means searchKey.Equal(found.Key()) is true
|
||
|
// ignore means we jump over it
|
||
|
// operation is the next operation decided by WalkNextStep
|
||
|
// always skip if SkipUselessSequentialWalking is called.
|
||
|
// In general SkipUselessSequentialWalking is not always
|
||
|
// called on nomatch. But it is always called if the
|
||
|
// search key is not a prefix key, which is our case.
|
||
|
// see is a reference to the list bellow
|
||
|
//
|
||
|
// a) Needless to search for keys in which the FIELD1 is equal to 1 since
|
||
|
// the FIELD2 is greater than the searched value. Any key with the FIELD1
|
||
|
// set to 1 that follow this one will have a FIELD2 greater than the searched
|
||
|
// value (5) since the keys are sorted in ascending order.
|
||
|
// The next possible key is the one that has FIELD1++.
|
||
|
//
|
||
|
// b) Same logic as before but, the FIELD1 has already reached its maximum value
|
||
|
// and can't be incremented. zebra will therefore be incremented by appending
|
||
|
// a \001 to it. This is only possible since we search for words beginning
|
||
|
// with z (z <UNDEF>). We would not do that if searching for (zebra <DEF>).
|
||
|
//
|
||
|
// c) The found key does not match the constraint (FIELD4 is lower than the searched
|
||
|
// value). We only need to set FIELD4 to the searched value to jump to the
|
||
|
// match. No incrementation in this case.
|
||
|
//
|
||
|
// d) The FIELD4 is greater than the searched value, making this a lot similar
|
||
|
// to the b) case since the FIELD3 value is <MAX>. However FIELD2 matches
|
||
|
// the search key, it is therefore useless to increment it. We must ignore
|
||
|
// it and increment FIELD1.
|
||
|
//
|
||
|
// Looking for zebra with flags 5
|
||
|
//
|
||
|
WordKey key("z <UNDEF> <UNDEF> 5 <UNDEF> 4 <UNDEF>");
|
||
|
WordCursor *search = words->Cursor(key);
|
||
|
|
||
|
#define WORD_NTEST 4
|
||
|
|
||
|
static char* found_strings[WORD_NTEST];
|
||
|
static char* expected_strings[WORD_NTEST];
|
||
|
|
||
|
int i = 0;
|
||
|
char tmp[1024];
|
||
|
|
||
|
//
|
||
|
// See a) in comment above
|
||
|
//
|
||
|
found_strings[i] = strdup("zebra <DEF> 1 6 1 4 3");
|
||
|
expected_strings[i] = strdup("zebra <DEF> 2 5 0 4 0");
|
||
|
i++;
|
||
|
|
||
|
//
|
||
|
// See b) in comment above
|
||
|
//
|
||
|
sprintf(tmp, "zebra <DEF> %d 6 1 4 3", WordKey::MaxValue(WORD_FIELD1));
|
||
|
found_strings[i] = strdup(tmp);
|
||
|
expected_strings[i] = strdup("zebra\001 <DEF> 0 5 0 4 0");
|
||
|
i++;
|
||
|
|
||
|
//
|
||
|
// See c) in comment above
|
||
|
//
|
||
|
found_strings[i] = strdup("zippo <DEF> 8 5 1 1 3");
|
||
|
expected_strings[i] = strdup("zippo <DEF> 8 5 1 4 0");
|
||
|
i++;
|
||
|
|
||
|
//
|
||
|
// See d) in comment above
|
||
|
//
|
||
|
sprintf(tmp, "zorro <DEF> 3 5 %d 6 3", WordKey::MaxValue(WORD_FIELD3));
|
||
|
found_strings[i] = strdup(tmp);
|
||
|
expected_strings[i] = strdup("zorro <DEF> 4 5 0 4 0");
|
||
|
i++;
|
||
|
|
||
|
for(i = 0; i < WORD_NTEST; i++) {
|
||
|
doskip_try(*words, *search, found_strings[i], expected_strings[i]);
|
||
|
free(found_strings[i]);
|
||
|
free(expected_strings[i]);
|
||
|
}
|
||
|
|
||
|
delete search;
|
||
|
words->Close();
|
||
|
delete words;
|
||
|
}
|
||
|
|
||
|
//
|
||
|
// Restore default configuration
|
||
|
//
|
||
|
WordContext::Initialize(*::config);
|
||
|
|
||
|
#undef WORD_FIELD1
|
||
|
#undef WORD_FIELD2
|
||
|
#undef WORD_FIELD3
|
||
|
#undef WORD_FIELD4
|
||
|
#undef WORD_FIELD5
|
||
|
}
|
||
|
|
||
|
int
|
||
|
get_int_array(char *s,int **plist,int &n)
|
||
|
{
|
||
|
int i=0;
|
||
|
for(n=0;s[i];n++)
|
||
|
{
|
||
|
for(;s[i] && !isalnum(s[i]);i++);
|
||
|
if(!s[i]){break;}
|
||
|
for(;s[i] && isalnum(s[i]);i++);
|
||
|
}
|
||
|
if(!n){*plist=NULL;return(NOTOK);}
|
||
|
int *list=new int[n];
|
||
|
*plist=list;
|
||
|
int j;
|
||
|
i=0;
|
||
|
for(j=0;s[i];j++)
|
||
|
{
|
||
|
for(;s[i] && !isalnum(s[i]);i++);
|
||
|
if(!s[i]){break;}
|
||
|
list[j]=atoi(s+i);
|
||
|
for(;s[i] && isalnum(s[i]);i++);
|
||
|
}
|
||
|
return(OK);
|
||
|
}
|
||
|
class SkipTestEntry
|
||
|
{
|
||
|
public:
|
||
|
char *searchkey;
|
||
|
char *goodorder;
|
||
|
void GetSearchKey(WordKey &searchKey)
|
||
|
{
|
||
|
searchKey.Set((String)searchkey);
|
||
|
if(verbose) fprintf(stderr, "GetSearchKey: string: %s got: %s\n", (char*)searchkey, (char*)searchKey.Get());
|
||
|
}
|
||
|
int Check(WordList &WList)
|
||
|
{
|
||
|
WordKey empty;
|
||
|
WordReference srchwrd;
|
||
|
GetSearchKey(srchwrd.Key());
|
||
|
Object o;
|
||
|
if(verbose) fprintf(stderr, "checking SkipUselessSequentialWalking on: %s\n", (char*)srchwrd.Get());
|
||
|
if(verbose) fprintf(stderr, "walking all:\n");
|
||
|
List *all = WList.WordRefs();
|
||
|
if(verbose) fprintf(stderr, "walking search: searching for: %s\n", (char*)srchwrd.Get());
|
||
|
|
||
|
WordCursor *search = WList.Cursor(srchwrd.Key(), HTDIG_WORDLIST_COLLECTOR);
|
||
|
search->SetTraces(new List);
|
||
|
search->Walk();
|
||
|
List *wresw = search->GetResults();
|
||
|
List *wres = search->GetTraces();
|
||
|
wresw->Start_Get();
|
||
|
wres->Start_Get();
|
||
|
WordReference *found;
|
||
|
WordReference *correct;
|
||
|
int i;
|
||
|
int ngoodorder;
|
||
|
int *goodorder_a;
|
||
|
get_int_array(goodorder,&goodorder_a,ngoodorder);
|
||
|
for(i=0;(found = (WordReference*)wres->Get_Next());i++)
|
||
|
{
|
||
|
if(i>=ngoodorder) {
|
||
|
fprintf(stderr, "SkipUselessSequentialWalking test failed! i>=ngoodorder\n");
|
||
|
exit(1);
|
||
|
}
|
||
|
if(verbose) fprintf(stderr, "Check actual %d'th walked: %s\n", i, (char*)found->Get());
|
||
|
correct = (WordReference*)all->Nth(goodorder_a[i]);
|
||
|
if(verbose) fprintf(stderr, "Check correct %d : %s\n", goodorder_a[i], (char*)correct->Get());
|
||
|
if(!correct->Key().Equal(found->Key())) {
|
||
|
fprintf(stderr, "SkipUselessSequentialWalking test failed! at position: %d\n", i);
|
||
|
exit(1);
|
||
|
}
|
||
|
}
|
||
|
if(i<ngoodorder) {
|
||
|
fprintf(stderr, "SkipUselessSequentialWalking test failed! n<ngoodorder\n");
|
||
|
exit(1);
|
||
|
}
|
||
|
|
||
|
delete [] goodorder_a;
|
||
|
delete wresw;
|
||
|
delete wres;
|
||
|
delete all;
|
||
|
delete search;
|
||
|
return OK;
|
||
|
}
|
||
|
};
|
||
|
|
||
|
SkipTestEntry SkipTestEntries[]=
|
||
|
{
|
||
|
{
|
||
|
"et <DEF> <UNDEF> 0 10 ",
|
||
|
"3 4 5 9 10 12 13 14"
|
||
|
},
|
||
|
{
|
||
|
"et <UNDEF> 20 0 <UNDEF> ",
|
||
|
"3 4 5 6 7 8 9 14 17",
|
||
|
},
|
||
|
};
|
||
|
|
||
|
static void doskip_normal(params_t*)
|
||
|
{
|
||
|
if(verbose > 0) fprintf(stderr, "doing SkipUselessSequentialWalking test\n");
|
||
|
// read db into WList from file: skiptest_db.txt
|
||
|
if(verbose) fprintf(stderr, "WList config:minimum_word_length: %d\n", config->Value("minimum_word_length"));
|
||
|
WordList WList(*config);
|
||
|
WList.Open((*config)["word_db"], O_RDWR);
|
||
|
// now check walk order for a few search terms
|
||
|
int i;
|
||
|
if(verbose) fprintf(stderr, "number of entries: %d\n", (int)(sizeof(SkipTestEntries)/sizeof(SkipTestEntry)));
|
||
|
for(i=0;i<(int)(sizeof(SkipTestEntries)/sizeof(SkipTestEntry));i++) {
|
||
|
if(SkipTestEntries[i].Check(WList) == NOTOK) {
|
||
|
fprintf(stderr, "SkipUselessSequentialWalking test failed on SkipTestEntry number: %d\n", i);
|
||
|
exit(1);
|
||
|
}
|
||
|
}
|
||
|
WList.Close();
|
||
|
}
|
||
|
|
||
|
static void doenv(params_t* params)
|
||
|
{
|
||
|
WordReference wordRef;
|
||
|
WordKey& key = wordRef.Key();
|
||
|
key.Set("the <def> 1 2 3");
|
||
|
WordList words(*config);
|
||
|
words.Open((*config)["word_db"], O_RDWR);
|
||
|
int i;
|
||
|
for(i = params->env; i < 10000; i += 2) {
|
||
|
key.Set(WORD_DOCID, i);
|
||
|
if(words.Insert(wordRef) != OK) {
|
||
|
fprintf(stderr, "doenv: cannot insert %d\n", i);
|
||
|
exit(1);
|
||
|
}
|
||
|
}
|
||
|
for(i = params->env; i < 10000; i += 2) {
|
||
|
key.Set(WORD_DOCID, i);
|
||
|
if(words.Exists(wordRef) != OK) {
|
||
|
fprintf(stderr, "doenv: cannot find %d\n", i);
|
||
|
exit(1);
|
||
|
}
|
||
|
}
|
||
|
words.Close();
|
||
|
}
|
||
|
|
||
|
//*****************************************************************************
|
||
|
// void usage()
|
||
|
// Display program usage information
|
||
|
//
|
||
|
static void usage()
|
||
|
{
|
||
|
printf("usage: word [options]\n");
|
||
|
printf("Options:\n");
|
||
|
printf("\t-v\t\tIncreases the verbosity\n");
|
||
|
printf("\t-k\t\tTest WordKey\n");
|
||
|
printf("\t-l\t\tTest WordList\n");
|
||
|
printf("\t-e n\t\tTest WordList with shared environnement, process number <n>\n");
|
||
|
printf("\t-s\t\tTest WordList::SkipUselessSequentialWalking\n");
|
||
|
printf("\t-z\t\tActivate compression test (use with -s, -b or -l)\n");
|
||
|
exit(0);
|
||
|
}
|