//
// Soundex.cc
//
// Soundex: A fuzzy matching algorithm on the principal of the
// Soundex method for last names used by the U.S. INS
// and described by Knuth and others.
//
// Part of the ht://Dig package
// Copyright (c) 1995-2004 The ht://Dig Group
// For copyright details, see the file COPYING in your distribution
// or the GNU Library General Public License (LGPL) version 2 or later
//
//
// $Id: Soundex.cc,v 1.11 2004/05/28 13:15:20 lha Exp $
//
#ifdef HAVE_CONFIG_H
#include "htconfig.h"
#endif /* HAVE_CONFIG_H */
#include
#include "Soundex.h"
#include "Dictionary.h"
#include
//*****************************************************************************
// Soundex::Soundex(const HtConfiguration& config_arg)
//
Soundex::Soundex(const HtConfiguration& config_arg) :
Fuzzy(config_arg)
{
name = "soundex";
}
//*****************************************************************************
// Soundex::~Soundex()
//
Soundex::~Soundex()
{
}
//*****************************************************************************
// void Soundex::generateKey(char *word, String &key)
//
void
Soundex::generateKey(char *word, String &key)
{
int code = 0;
int lastcode = 0;
key = 0;
if (!word)
{
key = '0';
return;
}
while (*word && !isalpha(*word))
word++;
if (*word)
{
key << *word++;
}
else
{
key = '0';
return;
}
while (key.length() < 6)
{
switch (*word)
{
case 'b':
case 'p':
case 'f':
case 'v':
code = 1;
break;
case 'c':
case 's':
case 'k':
case 'g':
case 'j':
case 'q':
case 'x':
case 'z':
code = 2;
break;
case 'd':
case 't':
code = 3;
break;
case 'l':
code = 4;
break;
case 'm':
case 'n':
code = 5;
break;
case 'r':
code = 6;
break;
case 'a':
case 'e':
case 'i':
case 'o':
case 'u':
case 'y':
case 'w':
case 'h':
code = 0;
break;
default:
break;
}
if (code && code != lastcode)
{
key << code;
lastcode = code;
}
if (*word)
word++;
else
break;
}
}
//*****************************************************************************
// void Soundex::addWord(char *word)
//
void
Soundex::addWord(char *word)
{
if (!dict)
{
dict = new Dictionary;
}
String key;
generateKey(word, key);
String *s = (String *) dict->Find(key);
if (s)
{
// if (mystrcasestr(s->get(), word) != 0)
(*s) << ' ' << word;
}
else
{
dict->Add(key, new String(word));
}
}