extra-dependencies/debian/htdig/htdig-3.2.0b6/test/word.cc

//
// word.cc
//
// word: Implement tests for the word database related classes.
//
// Part of the ht://Dig package   <http://www.htdig.org/>
// Copyright (c) 1999-2004 The ht://Dig Group
// For copyright details, see the file COPYING in your distribution
// or the GNU Library General Public License (LGPL) version 2 or later
// <http://www.gnu.org/copyleft/lgpl.html>
//
// $Id: word.cc,v 1.19 2004/05/28 13:15:30 lha Exp $
//

#ifdef HAVE_CONFIG_H
#include "htconfig.h"
#endif /* HAVE_CONFIG_H */

#include <fcntl.h>
#include <unistd.h>
#include <stdlib.h>
#include <stdio.h>
#include <ctype.h>

// If we have this, we probably want it.
#ifdef HAVE_GETOPT_H
#include <getopt.h>
#endif

#include "WordKey.h"
#include "WordList.h"
#include "WordContext.h"
#include "Configuration.h"

static ConfigDefaults config_defaults[] = {
  { "word_db", "test", 0 },
  { 0 }
};

static Configuration*	config = 0;

typedef struct
{
    int key;
    int list;
    int skip;
    int compress;
    int env;
} params_t;

static void usage();
static void doword(params_t* params);
static void dolist(params_t* params);
static void dokey(params_t* params);
static void doskip(params_t* params);
static void doenv(params_t* params);
static void pack_show_wordreference(const WordReference& wordRef);
static void pack_show_key(const String& key);

static int verbose = 0;

// *****************************************************************************
// int main(int ac, char **av)
//

int main(int ac, char **av)
{
  int			c;
  params_t		params;

  params.key = 0;
  params.list = 0;
  params.skip = 0;
  params.env = 0;
  params.compress = 0;

  while ((c = getopt(ac, av, "ve:klbszw:")) != -1)
    {
      switch (c)
	{
	case 'v':
	  verbose++;
	  break;
	case 'k':
	  params.key = 1;
	  break;
	case 'l':
	  params.list = 1;
	  break;
	case 's':
	  params.skip = 1;
	  break;
	case 'e':
	  params.env = atoi(optarg);
	  break;
	case 'z':
	  params.compress = 1;
	  break;
	case '?':
	  usage();
	  break;
	}
    }

  doword(&params);

  return 0;
}

//
// mifluz.conf structure
//
#define WORD_DOCID	1
#define WORD_FLAGS	2
#define WORD_LOCATION	3

static void doword(params_t* params)
{
  if(params->key) {
    if(verbose) fprintf(stderr, "Test WordKey class\n");
    dokey(params);
  }

  if(params->list || params->skip || params->env) {
    config = WordContext::Initialize(config_defaults);
    if(params->compress) {
	config->Add("wordlist_compress", "true");
    }
    if(verbose > 2) {
      String tmp;
      tmp << (verbose - 2);
      config->Add("wordlist_verbose", tmp);
    }
    if(params->env) {
      config->Add("wordlist_env_share", "true");
      config->Add("wordlist_env_dir", ".");
    }

    WordContext::Initialize(*config);
  }


  if(params->list) {
    if(verbose) fprintf(stderr, "Test WordList class\n");
    dolist(params);
  }

  if(params->skip) {
    if(verbose) fprintf(stderr, "Test WordList::SkipUselessSequentialWalking method\n");
    doskip(params);
  }

  if(params->env) {
    if(verbose) fprintf(stderr, "Test WordList with shared env\n");
    doenv(params);
  }
}

static void dolist(params_t*)
{
  static char* word_list[] = {
    "The",	// DocID = 1
    "quick",	// DocID = 2
    "brown",	// DocID = 3
    "fox",	// DocID = 4
    "jumps",	// DocID = 5
    "over",	// DocID = 6
    "the",	// DocID = 7
    "lazy",	// DocID = 8
    "dog",	// DocID = 9
    0
  };

  //
  // Most simple case. Insert a few words and
  // search them, using exact match.
  //
  {

    // setup a new wordlist
    WordList words(*config);
    if(verbose)WordKeyInfo::Instance()->Show();
    words.Open((*config)["word_db"], O_RDWR);


    // create entries from word_list
    WordReference wordRef;
    wordRef.Key().Set(WORD_FLAGS, 67);
    unsigned int location = 0;
    unsigned int anchor = 0;
    unsigned int docid = 1;
    if(verbose) fprintf(stderr, "Inserting\n");

    for(char** p = word_list; *p; p++) {
      if(verbose > 4) fprintf(stderr, "inserting word: %s\n", *p);
      wordRef.Key().SetWord(*p);
      wordRef.Key().Set(WORD_DOCID, docid);
      wordRef.Key().Set(WORD_LOCATION, location);
      wordRef.Record().info.data = anchor;
      if(verbose > 1) fprintf(stderr, "%s\n", (char*)wordRef.Get());
      if(verbose > 2) pack_show_wordreference(wordRef);
      words.Insert(wordRef);
      location += strlen(*p);
      anchor++;
      docid++;
    }
    words.Close();

    location = anchor = 0;
    docid = 1;

    if(verbose) fprintf(stderr, "Searching\n");

    // reopen wordlist
    words.Open((*config)["word_db"], O_RDONLY);
    //  check if each word (from word_list) is there
    for(char** p = word_list; *p; p++)
    {
      // recreate wordref from each word
      wordRef.Key().SetWord(*p);
      wordRef.Key().Set(WORD_LOCATION, location);
      wordRef.Record().info.data = anchor;
      wordRef.Key().Set(WORD_DOCID, docid);

      location += strlen(*p);
      anchor++;
      docid++;

      //
      // Skip first word because we don't want to deal with upper/lower case at present.
      //
      if(p == word_list) continue;

      // check if wordref is in wordlist
      if(verbose) fprintf(stderr, "searching for %s ... ", *p);
      if(verbose > 2) pack_show_wordreference(wordRef);
      if(verbose > 1) fprintf(stderr, "%s\n", (char*)wordRef.Get());
      // find matches in wordlist
      List *result = words[wordRef];
      if(!result) {
	fprintf(stderr, "dolist: words[wordRef] returned null pointer\n");
	exit(1);
      }
      result->Start_Get();
      int count = 0;
      WordReference* found;
      // loop through found matches
      while((found = (WordReference*)result->Get_Next()))
      {
	if(wordRef.Key().GetWord() != found->Key().GetWord())
	{
	  fprintf(stderr, "dolist: simple: expected %s, got %s\n", (char*)wordRef.Key().GetWord(), (char*)found->Key().GetWord());
	  exit(1);
	}
	count++;
      }
      if(count != 1) {
	fprintf(stderr, "dolist: simple: searching %s, got %d matches instead of 1\n", (char*)wordRef.Key().GetWord(), count);
  	exit(1);
      }
      if(verbose) fprintf(stderr, "done\n");

      delete result;

    }
  }
  //
  // Print all records as sorted within Berkeley DB with number
  // of occurrences.
  //
  if(verbose) {
    WordList words(*config);
    words.Open((*config)["word_db"], O_RDWR);

    List *result = words.Words();
    if(result == 0) {
      fprintf(stderr, "dolist: getting all words failed\n");
      exit(1);
    }
    result->Start_Get();
    int count = 0;
    String* found;
    while((found = (String*)result->Get_Next())) {
      unsigned int noccurrence;
      WordKey key;
      key.SetWord(*found);
      words.Noccurrence(key, noccurrence);
      fprintf(stderr, "%s (%d)\n", (char*)(*found), noccurrence);
      count++;
    }
    if(count != 8) {
      fprintf(stderr, "dolist: getting all words, got %d matches instead of 8\n", count);
      exit(1);
    }

    delete result;
  }
  //
  // Search all occurrences of 'the'
  //
  {
    WordList words(*config);
    words.Open((*config)["word_db"], O_RDWR);

    WordReference wordRef;
    wordRef.Key().SetWord("the");

    unsigned int noccurrence;
    if(words.Noccurrence(wordRef.Key(), noccurrence) != OK) {
      fprintf(stderr, "dolist: get ref count of 'the' failed\n");
      exit(1);
    } else if(noccurrence != 2) {
      fprintf(stderr, "dolist: get ref count of 'the' failed, got %d instead of 2\n", noccurrence);
      exit(1);
    }
    List *result = words[wordRef];
    result->Start_Get();
    int count = 0;
    WordReference* found;
    while((found = (WordReference*)result->Get_Next())) {
	if(wordRef.Key().GetWord() != found->Key().GetWord()) {
	  fprintf(stderr, "dolist: simple: expected %s, got %s\n", (char*)wordRef.Key().GetWord(), (char*)found->Key().GetWord());
	  exit(1);
	}
	if(verbose) fprintf(stderr, "%s\n", (char*)found->Get());
	count++;
    }
    if(count != 2) {
      fprintf(stderr, "dolist: searching occurrences of '%s', got %d matches instead of 2\n", (char*)wordRef.Key().GetWord(), count);
      exit(1);
    }

    delete result;
  }
  //
  // Delete all occurrences of 'the'
  //
  {
    WordList words(*config);
    words.Open((*config)["word_db"], O_RDWR);

    WordReference wordRef("the");
    if(verbose) {
      fprintf(stderr, "**** Delete test:\n");
      words.Write(stderr);
      fprintf(stderr, "**** Delete test:\n");
    }
    int count;
    if((count = words.WalkDelete(wordRef)) != 2) {
      fprintf(stderr, "dolist: delete occurrences of 'the', got %d deletion instead of 2\n", count);
      exit(1);
    }

    List* result = words[wordRef];
    if(result->Count() != 0) {
      fprintf(stderr, "dolist: unexpectedly found 'the' \n");
      exit(1);
    }
    delete result;

    unsigned int noccurrence;
    if(words.Noccurrence(wordRef.Key(), noccurrence) != OK) {
      fprintf(stderr, "dolist: get ref count of 'thy' failed\n");
      exit(1);
    } else if(noccurrence != 0) {
      fprintf(stderr, "dolist: get ref count of 'thy' failed, got %d instead of 0\n", noccurrence);
      exit(1);
    }
  }
  //
  // Delete all words in document 5 (only one word : jumps)
  //
  {
    WordList words(*config);
    words.Open((*config)["word_db"], O_RDWR);

    WordReference wordRef;
    wordRef.Key().Set(WORD_DOCID, 5);
    int count;
    if((count = words.WalkDelete(wordRef)) != 1) {
      fprintf(stderr, "dolist: delete occurrences in DocID 5, %d deletion instead of 1\n", count);
      exit(1);
    }

    wordRef.Clear();
    wordRef.Key().SetWord("jumps");
    List* result = words[wordRef];
    if(result->Count() != 0) {
      fprintf(stderr, "dolist: unexpectedly found 'jumps' \n");
      exit(1);
    }
    delete result;

    unsigned int noccurrence;
    if(words.Noccurrence(wordRef.Key(), noccurrence) != OK) {
      fprintf(stderr, "dolist: get ref count of 'jumps' failed\n");
      exit(1);
    } else if(noccurrence != 0) {
      fprintf(stderr, "dolist: get ref count of 'jumps' failed, got %d instead of 0\n", noccurrence);
      exit(1);
    }
  }
}

#define WORD_BIT_MASK(b) ((b) == 32 ? 0xffffffff : (( 1 << (b)) - 1))

//
// See WordKey.h
// Tested: Pack, Unpack, Compare (both forms), accessors, meta information
//
static void
dokey(params_t* params)
{
  static char *key_descs[] = {
    "Word/DocID 5/Flags 8/Location 19",
    "Word/DocID 3/Location 2/Flags 11",
    "Word/DocID 3/Flags 8/Location 5",
    "Word/DocID 3/Flags 14/Location 7",
    "Word/DocID 3/Flags 9/Location 7/Foo1 13/Foo2 16",
    0,
  };
  char** key_desc;

  for(key_desc = key_descs; *key_desc; key_desc++) {
    WordKeyInfo::InitializeFromString(*key_desc);

    if(verbose)
      WordKeyInfo::Instance()->Show();

    WordKey word;
    word.SetWord("aword");
    int j;
    for(j = WORD_FIRSTFIELD; j < word.NFields(); j++) {
      WordKeyNum value = (0xdededede & word.MaxValue(j));
      word.Set(j, value);
    }
    if(verbose > 1) fprintf(stderr, "WORD: %s\n", (char*)word.Get());

    String packed;
    word.Pack(packed);

    WordKey other_word;
    other_word.Unpack(packed);
    if(verbose > 1) fprintf(stderr, "OTHER_WORD: %s\n", (char*)other_word.Get());

    int failed = 0 ;
    for(j = WORD_FIRSTFIELD; j < word.NFields(); j++) {
      if(word.Get(j) != other_word.Get(j)) {
	failed = 1;
	break;
      }
    }
    if(word.GetWord() != other_word.GetWord() ||
       !word.IsDefined(0) ||
       !other_word.IsDefined(0))
      failed = 1;

    if(failed) {
      fprintf(stderr, "Original and packed/unpacked not equal\n");
      WordKeyInfo::Instance()->Show();
      fprintf(stderr, "WORD: %s\n", (char*)word.Get());
      pack_show_key(packed);
      fprintf(stderr, "OTHER_WORD: %s\n", (char*)other_word.Get());
      exit(1);
    }

    //
    // Compare in packed form
    //
    if(!word.PackEqual(other_word))
      {
	fprintf(stderr, "dokey: %s not equal (object compare)\n", *key_desc);
	exit(1);
      }

    //
    // Pack the other_word
    //
    String other_packed;

    other_word.Pack(other_packed);
    //
    // The two (word and other_word) must compare equal
    // using the alternate comparison (fast) interface.
    //
    if(WordKey::Compare(packed, other_packed) != 0) {
      fprintf(stderr, "dokey: %s not equal (fast compare)\n", *key_desc);
      exit(1);
    }

    word.SetWord("Test string");
    word.Set(WORD_DOCID,1);
    other_word.SetWord("Test string");
    word.Pack(packed);
    //
    // Add one char to the word, they must not compare equal and
    // the difference must be minus one.
    //
    other_word.GetWord().append("a");
    other_word.Pack(other_packed);
    {
      int ret;
      if((ret = WordKey::Compare(packed, other_packed)) != -1)
	{
	  fprintf(stderr, "%s\n", (char*)word.Get());
	  fprintf(stderr, "%s\n", (char*)other_word.Get());
	  fprintf(stderr, "dokey: %s different length, expected -1 got %d\n", *key_desc, ret);
	  exit(1);
	}
    }
    other_word.SetWord("Test string");

    //
    // Change T to S
    // the difference must be one.
    //
    {
      String& tmp = other_word.GetWord();
      tmp[tmp.indexOf('T')] = 'S';
    }
    other_word.Pack(other_packed);
    {
      int ret;
      if((ret = WordKey::Compare(packed, other_packed)) != 1)
	{
	  fprintf(stderr, "%s\n", (char*)word.Get());
	  fprintf(stderr, "%s\n", (char*)other_word.Get());
	  fprintf(stderr, "dokey: %s different letter (S instead of T), expected 1 got %d\n", *key_desc, ret);
	  exit(1);
	}
    }
    other_word.SetWord("Test string");

    //
    // Substract one to the first numeric field
    // The difference must be one.
    //
    other_word.Set(WORD_DOCID,word.Get(WORD_DOCID) - 1);
    other_word.Pack(other_packed);
    {
      int ret;
      if((ret = WordKey::Compare(packed, other_packed)) != 1)
	{
	  fprintf(stderr, "%s\n", (char*)word.Get());
	  fprintf(stderr, "%s\n", (char*)other_word.Get());
	  fprintf(stderr, "dokey: %s different numeric field, expected 1 got %d\n", *key_desc, ret);
	  exit(1);
	}
    }
  }
  //
  // WordKey::Diff function
  //
  {
    WordKey word("Test1 <DEF> 1 2 3 4 5");
    WordKey other_word("Sest1 <DEF> 1 2 3 4 5");
    //
    // Diff must say that field 0 differ and that word is lower than other_word
    //
    {
      int position = 0;
      int lower = 0;
      if(!word.Diff(other_word, position, lower)) {
	fprintf(stderr, "%s\n", (char*)word.Get());
	fprintf(stderr, "%s\n", (char*)other_word.Get());
	fprintf(stderr, "dokey: diff failed\n");
	exit(1);
      }
      if(position != 0 || lower != 1) {
	fprintf(stderr, "%s\n", (char*)word.Get());
	fprintf(stderr, "%s\n", (char*)other_word.Get());
	fprintf(stderr, "dokey: diff expected position = 0 and lower = 1 but got position = %d and lower = %d\n", position, lower);
	exit(1);
      }
    }
    //
    // Only compare prefix
    //
    other_word.SetWord("Test");
    other_word.UndefinedWordSuffix();
    other_word.Set(WORD_DOCID, 5);
    {
      int position = 0;
      int lower = 0;
      if(!word.Diff(other_word, position, lower)) {
	fprintf(stderr, "dokey: diff failed\n");
	exit(1);
      }
      if(position != 1 || lower != 1) {
	fprintf(stderr, "%s\n", (char*)word.Get());
	fprintf(stderr, "%s\n", (char*)other_word.Get());
	fprintf(stderr, "dokey: diff expected position = 1 and lower = 1 but got position = %d and lower = %d\n", position, lower);
	exit(1);
      }
    }
    //
    // Same key have no diff
    //
    {
      int position = 0;
      int lower = 0;
      if(word.Diff(word, position, lower)) {
	fprintf(stderr, "dokey: diff found when comparing %s with itself\n", (char*)word.Get());
	exit(1);
      }
    }
  }
}

static void pack_show_key(const String& key)
{
  int i;
  char c;

  for(i=0; i < key.length(); i++) {
    c = (isprint(key[i]) ? key[i] : '#');
    fprintf(stderr, "%c-%2x ", c, key[i]);
  }
  fprintf(stderr, "\n");

  for(i = 0; i < key.length(); i++) {
    int j;
    for(j = 0; j < 8; j++)
      fprintf(stderr, "%c", (key[j] & (1<<(j))) ? '1' : '0');
  }

  fprintf(stderr, "\n");
  fprintf(stderr, "^0      ^1      ^2      ^3      ^4      ^5      ^6      ^7\n");
  fprintf(stderr, "0123456701234567012345670123456701234567012345670123456701234567\n");
}

static void pack_show_wordreference(const WordReference& wordRef)
{
  String key;
  String record;

  wordRef.Pack(key, record);

  fprintf(stderr, "key = ");
  for(int i = 0; i < key.length(); i++) {
    fprintf(stderr, "0x%02x(%c) ", key[i] & 0xff, key[i]);
  }
  fprintf(stderr, " record = ");
  for(int i = 0; i < record.length(); i++) {
    fprintf(stderr, "0x%02x(%c) ", record[i] & 0xff, record[i]);
  }
  fprintf(stderr, "\n");
}


//*****************************************************************************
// void doskip()
//   Test SkipUselessSequentialWalking in WordList class
//
static void doskip_normal(params_t*);
static void doskip_harness(params_t*);
static void doskip_overflow(params_t*);
static void doskip_try(WordList& words, WordCursor& search, char* found_string, char* expected_string);

static void doskip(params_t* params)
{
  if(verbose) fprintf(stderr, "Test WordList::SkipUselessSequentialWalking normal\n");
  doskip_normal(params);
  if(verbose) fprintf(stderr, "Test WordList::SkipUselessSequentialWalking harness\n");
  doskip_harness(params);
  if(verbose) fprintf(stderr, "Test WordList::SkipUselessSequentialWalking overflow\n");
  doskip_overflow(params);
}

static void doskip_try(WordList& words, WordCursor& search, char* found_string, char* expected_string)
{
  const WordKey& found = search.GetFound().Key();
  ((WordKey&)found).Set(found_string);
  if(search.SkipUselessSequentialWalking() == NOTOK) {
    fprintf(stderr, "doskip_try: SkipUselessSequentialWalking NOTOK searching %s at step %s expecting %s\n", (char*)search.GetSearch().Get(), (char*)found.Get(), (char*)expected_string);
    exit(1);
  }

  WordKey expected(expected_string);
  if(!found.ExactEqual(expected)) {
    fprintf(stderr, "doskip_try: expected %s but got %s\n", (char*)expected.Get(), (char*)found.Get());
    exit(1);
  }
}

//
// Create artificial WordCursor context
// in which SkipUselessSequentialWalking calls SetToFollowing
// that triggers overflow condition.
//
static void doskip_overflow(params_t*)
{
#define WORD_FIELD1	1
#define WORD_FIELD2	2
#define WORD_FIELD3	3

  static ConfigDefaults config_defaults[] = {
    { "wordlist_wordkey_description", "Word/FIELD1 32/FIELD2 8/FIELD3 16", 0 },
    { 0 }
  };
  Configuration config;
  config.Defaults(config_defaults);
  if(verbose > 2) config.Add("wordlist_verbose", "3");
  WordContext::Initialize(config);
  {
    WordList* words = new WordList(config);

    //
    // Looking for zebra at location 3
    //
    WordKey key("zebra <UNDEF> <UNDEF> <UNDEF> 3");
    WordCursor *search = words->Cursor(key);

    {
      //
      // Pretend we found zebra <DEF> 3 <MAX> 7
      // That is a valid candidate for SkipUselessSequentialWalking
      //
      String found;
      found << "zebra <DEF> 3 " << WordKey::MaxValue(WORD_FIELD2) << " 7";

      //
      // Overflow on FIELD2 must trigger ++ on FIELD1
      //
      String expected("zebra	<DEF>	4	0	3");
      doskip_try(*words, *search, found, expected);
    }

    {
      //
      // Prented we found zebra <DEF> <MAX> <MAX> 7
      // That is a valid candidate for SkipUselessSequentialWalking
      //
      String found;
      found << "zebra <DEF> " << WordKey::MaxValue(WORD_FIELD1) << " " << WordKey::MaxValue(WORD_FIELD2) << " 7";

      //
      // Overflow on FIELD2 must trigger append \001 on word Word
      //
      String expected("zebra\001	<DEF>	0	0	3");
      doskip_try(*words, *search, found, expected);

      //
      // Cannot increment, SkipUselessSequentialWalking returns NOTOK
      //
      ((WordKey&)search->GetSearch()).SetDefinedWordSuffix();
      ((WordReference&)search->GetFound()).Key().Set(found);
      if(search->SkipUselessSequentialWalking() != WORD_WALK_ATEND) {
	fprintf(stderr, "doskip_overflow: SkipUselessSequentialWalking expected NOTOK & WORD_WALK_ATEND searching %s\n", (char*)key.Get());
	exit(1);
      }
    
    }

    delete search;
    words->Close();
    delete words;
  }

  //
  // Restore default configuration
  //
  WordContext::Initialize(*::config);

#undef WORD_FIELD1
#undef WORD_FIELD2
#undef WORD_FIELD3
}

//
// Create artificial WordCursor contexts
// that covers all possible behaviour of SkipUselessSequentialWalking.
//
static void doskip_harness(params_t*)
{
#define WORD_FIELD1	1
#define WORD_FIELD2	2
#define WORD_FIELD3	3
#define WORD_FIELD4	4
#define WORD_FIELD5	5

  static ConfigDefaults config_defaults[] = {
    { "wordlist_wordkey_description", "Word/FIELD1 8/FIELD2 8/FIELD3 8/FIELD4 8/FIELD5 8", 0 },
    { 0 }
  };
  Configuration config;
  config.Defaults(config_defaults);
  if(verbose > 2) config.Add("wordlist_verbose", "3");
  WordContext::Initialize(config);
  {
    WordList* words = new WordList(config);

    //
    // Searching
    //
    // z <UNDEF> <UNDEF> 5 <UNDEF> 4 <UNDEF>
    //
    // in data set
    //
    // DATA                              SEE  STATUS    OPERATION                        
    // zebra <DEF> 1     5 1      4 3         found     next                             
    // zebra <DEF> 1     6 1      4 3    a    nomatch   skip to zebra <DEF> 2 5 0 4 0    
    // zebra <DEF> 1     6 2      4 3         ignore                                     
    // zebra <DEF> 2     3 1      4 3         ignore                                     
    // zebra <DEF> <MAX> 6 1      4 3    b    nomatch   skip to zebra\001 <DEF> 0 5 0 4 0
    // zebra <DEF> <MAX> 7 1      4 3         ignore                                     
    // zebra <DEF> <MAX> 8 1      4 3         ignore                                     
    // zebra <DEF> <MAX> 9 1      4 3         ignore                                     
    // zippo <DEF> 0     3 1      4 3         ignore                                     
    // zippo <DEF> 8     5 1      1 3    c    nomatch   skip to zippo <DEF> 8 5 1 4 0    
    // zippo <DEF> 8     5 1      2 3         ignore                                     
    // zippo <DEF> 8     5 1      2 5         ignore                                     
    // zippo <DEF> 8     5 1      2 9         ignore                                     
    // zippo <DEF> 8     5 1      3 9         ignore                                     
    // zorro <DEF> 3     5 <MAX>  6 3    d    nomatch   skip to zorro <DEF> 4 5 0 4 0
    // zorro <DEF> 3     5 <MAX>  6 5         ignore
    // zorro <DEF> 3     5 <MAX>  8 5         ignore
    // zorro <DEF> 4     5 2      4 3         found                                      
    //
    // legend: status is what WalkNextStep function says about the key
    //                nomatch means searchKey.Equal(found.Key()) is false
    //                found means searchKey.Equal(found.Key()) is true
    //                ignore means we jump over it
    //         operation is the next operation decided by WalkNextStep
    //                always skip if SkipUselessSequentialWalking is called.
    //                In general SkipUselessSequentialWalking is not always
    //                called on nomatch. But it is always called if the
    //                search key is not a prefix key, which is our case.
    //         see is a reference to the list bellow
    //
    //  a) Needless to search for keys in which the FIELD1 is equal to 1 since
    //     the FIELD2 is greater than the searched value. Any key with the FIELD1
    //     set to 1 that follow this one will have a FIELD2 greater than the searched
    //     value (5) since the keys are sorted in ascending order. 
    //     The next possible key is the one that has FIELD1++.
    //
    //  b) Same logic as before but, the FIELD1 has already reached its maximum value
    //     and can't be incremented. zebra will therefore be incremented by appending
    //     a \001 to it. This is only possible since we search for words beginning
    //     with z (z <UNDEF>). We would not do that if searching for (zebra <DEF>).
    //
    //  c) The found key does not match the constraint (FIELD4 is lower than the searched
    //     value). We only need to set FIELD4 to the searched value to jump to the
    //     match. No incrementation in this case.
    //
    //  d) The FIELD4 is greater than the searched value, making this a lot similar
    //     to the b) case since the FIELD3 value is <MAX>. However FIELD2 matches
    //     the search key, it is therefore useless to increment it. We must ignore
    //     it and increment FIELD1.
    //
    // Looking for zebra with flags 5
    //
    WordKey key("z <UNDEF> <UNDEF> 5 <UNDEF> 4 <UNDEF>");
    WordCursor *search = words->Cursor(key);

#define WORD_NTEST 4

    static char* found_strings[WORD_NTEST];
    static char* expected_strings[WORD_NTEST];
  
    int i = 0;
    char tmp[1024];
  
    //
    // See a) in comment above
    //
    found_strings[i] = strdup("zebra <DEF> 1     6 1      4 3");
    expected_strings[i] = strdup("zebra <DEF> 2 5 0 4 0");
    i++;

    //
    // See b) in comment above
    //
    sprintf(tmp, "zebra <DEF> %d 6 1      4 3", WordKey::MaxValue(WORD_FIELD1));
    found_strings[i] = strdup(tmp);
    expected_strings[i] = strdup("zebra\001 <DEF> 0 5 0 4 0");
    i++;

    //
    // See c) in comment above
    //
    found_strings[i] = strdup("zippo <DEF> 8     5 1      1 3");
    expected_strings[i] = strdup("zippo <DEF> 8 5 1 4 0");
    i++;

    //
    // See d) in comment above
    //
    sprintf(tmp, "zorro <DEF> 3     5 %d  6 3", WordKey::MaxValue(WORD_FIELD3));
    found_strings[i] = strdup(tmp);
    expected_strings[i] = strdup("zorro <DEF> 4 5 0 4 0");
    i++;

    for(i = 0; i < WORD_NTEST; i++) {
      doskip_try(*words, *search, found_strings[i], expected_strings[i]);
      free(found_strings[i]);
      free(expected_strings[i]);
    }

    delete search;
    words->Close();
    delete words;
  }

  //
  // Restore default configuration
  //
  WordContext::Initialize(*::config);

#undef WORD_FIELD1
#undef WORD_FIELD2
#undef WORD_FIELD3
#undef WORD_FIELD4
#undef WORD_FIELD5
}

int
get_int_array(char *s,int **plist,int &n)
{
    int i=0;
    for(n=0;s[i];n++)
    {
	for(;s[i] && !isalnum(s[i]);i++);
	if(!s[i]){break;}
	for(;s[i] && isalnum(s[i]);i++);
    }
    if(!n){*plist=NULL;return(NOTOK);}
    int *list=new int[n];
    *plist=list;
    int j;
    i=0;
    for(j=0;s[i];j++)
    {
	for(;s[i] && !isalnum(s[i]);i++);
	if(!s[i]){break;}
	list[j]=atoi(s+i);
	for(;s[i] && isalnum(s[i]);i++);
    }
    return(OK);
}
class SkipTestEntry
{
public:
    char *searchkey;
    char *goodorder;
    void GetSearchKey(WordKey &searchKey)
    {
	searchKey.Set((String)searchkey);
	if(verbose) fprintf(stderr, "GetSearchKey: string: %s got: %s\n", (char*)searchkey, (char*)searchKey.Get());
    }
    int Check(WordList &WList)
    {
	WordKey empty;
	WordReference srchwrd;
	GetSearchKey(srchwrd.Key());
	Object o;
	if(verbose) fprintf(stderr, "checking SkipUselessSequentialWalking on: %s\n", (char*)srchwrd.Get());
	if(verbose) fprintf(stderr, "walking all:\n");
	List *all = WList.WordRefs();
	if(verbose) fprintf(stderr, "walking search: searching for: %s\n", (char*)srchwrd.Get());

	WordCursor *search = WList.Cursor(srchwrd.Key(), HTDIG_WORDLIST_COLLECTOR);
	search->SetTraces(new List);
	search->Walk();
	List *wresw = search->GetResults();
	List *wres  = search->GetTraces();
	wresw->Start_Get();
	wres->Start_Get();
	WordReference *found;
	WordReference *correct;
	int i;
	int ngoodorder;
	int *goodorder_a;
	get_int_array(goodorder,&goodorder_a,ngoodorder);
	for(i=0;(found = (WordReference*)wres->Get_Next());i++)
	{
  	    if(i>=ngoodorder) {
	      fprintf(stderr, "SkipUselessSequentialWalking test failed! i>=ngoodorder\n");
	      exit(1);
	    }
	    if(verbose) fprintf(stderr, "Check actual %d'th walked: %s\n", i, (char*)found->Get());
	    correct = (WordReference*)all->Nth(goodorder_a[i]);
	    if(verbose) fprintf(stderr, "Check correct %d : %s\n", goodorder_a[i], (char*)correct->Get());
	    if(!correct->Key().Equal(found->Key())) {
	      fprintf(stderr, "SkipUselessSequentialWalking test failed! at position: %d\n", i);
	      exit(1);
	    }
	}
	if(i<ngoodorder) {
	  fprintf(stderr, "SkipUselessSequentialWalking test failed! n<ngoodorder\n");
	  exit(1);
	}

	delete [] goodorder_a;
	delete wresw;
	delete wres;
	delete all;
	delete search;
	return OK;
    }
};

SkipTestEntry SkipTestEntries[]=
{
     {
 	"et  <DEF>      <UNDEF>       0       10      ",
 	"3 4 5 9 10 12 13 14"
     },
     {
 	"et  <UNDEF>    20            0       <UNDEF> ",
 	"3 4 5 6 7 8 9 14 17",
     },
};

static void doskip_normal(params_t*)
{
  if(verbose > 0) fprintf(stderr, "doing SkipUselessSequentialWalking test\n");
  // read db into WList from file: skiptest_db.txt
  if(verbose) fprintf(stderr, "WList config:minimum_word_length: %d\n", config->Value("minimum_word_length"));
  WordList WList(*config);
  WList.Open((*config)["word_db"], O_RDWR);
  // now check walk order for a few search terms
  int i;
  if(verbose) fprintf(stderr, "number of entries: %d\n", (int)(sizeof(SkipTestEntries)/sizeof(SkipTestEntry)));
  for(i=0;i<(int)(sizeof(SkipTestEntries)/sizeof(SkipTestEntry));i++) {
    if(SkipTestEntries[i].Check(WList) == NOTOK) {
      fprintf(stderr, "SkipUselessSequentialWalking test failed on SkipTestEntry number: %d\n", i);
      exit(1);
    }
  }
  WList.Close();
}

static void doenv(params_t* params)
{
  WordReference wordRef;
  WordKey& key = wordRef.Key();
  key.Set("the <def> 1 2 3");
  WordList words(*config);
  words.Open((*config)["word_db"], O_RDWR);
  int i;
  for(i = params->env; i < 10000; i += 2) {
    key.Set(WORD_DOCID, i);
    if(words.Insert(wordRef) != OK) {
      fprintf(stderr, "doenv: cannot insert %d\n", i);
      exit(1);
    }
  }
  for(i = params->env; i < 10000; i += 2) {
    key.Set(WORD_DOCID, i);
    if(words.Exists(wordRef) != OK) {
      fprintf(stderr, "doenv: cannot find %d\n", i);
      exit(1);
    }
  }
  words.Close();
}

//*****************************************************************************
// void usage()
//   Display program usage information
//
static void usage()
{
    printf("usage: word [options]\n");
    printf("Options:\n");
    printf("\t-v\t\tIncreases the verbosity\n");
    printf("\t-k\t\tTest WordKey\n");
    printf("\t-l\t\tTest WordList\n");
    printf("\t-e n\t\tTest WordList with shared environnement, process number <n>\n");
    printf("\t-s\t\tTest WordList::SkipUselessSequentialWalking\n");
    printf("\t-z\t\tActivate compression test (use with -s, -b or -l)\n");
    exit(0);
}