You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
117 lines
4.2 KiB
117 lines
4.2 KiB
//
|
|
// StringMatch.h
|
|
//
|
|
// StringMatch: This class provides an interface to a fairly specialized string
|
|
// lookup facility. It is intended to be used as a replace for any
|
|
// regular expression matching when the pattern string is in the form:
|
|
//
|
|
// <string1>|<string2>|<string3>|...
|
|
//
|
|
// Just like regular expression routines, the pattern needs to be
|
|
// compiled before it can be used. This is done using the Pattern()
|
|
// member function. Once the pattern has been compiled, the member
|
|
// function Find() can be used to search for the pattern in a string.
|
|
// If a string has been found, the "which" and "length" parameters
|
|
// will be set to the string index and string length respectively.
|
|
// (The string index is counted starting from 0) The return value of
|
|
// Find() is the position at which the string was found or -1 if no
|
|
// strings could be found. If a case insensitive match needs to be
|
|
// performed, call the IgnoreCase() member function before calling
|
|
// Pattern(). This function will setup a character translation table
|
|
// which will convert all uppercase characters to lowercase. If some
|
|
// other translation is required, the TranslationTable() member
|
|
// function can be called to provide a custom table. This table needs
|
|
// to be 256 characters.
|
|
//
|
|
// Part of the ht://Dig package <http://www.htdig.org/>
|
|
// Copyright (c) 1999-2004 The ht://Dig Group
|
|
// For copyright details, see the file COPYING in your distribution
|
|
// or the GNU Library General Public License (LGPL) version 2 or later
|
|
// <http://www.gnu.org/copyleft/lgpl.html>
|
|
//
|
|
// $Id: StringMatch.h,v 1.13 2004/05/28 13:15:21 lha Exp $
|
|
//
|
|
|
|
#ifndef _StringMatch_h_
|
|
#define _StringMatch_h_
|
|
|
|
#include "Object.h"
|
|
#include "HtWordType.h"
|
|
|
|
class StringMatch : public Object
|
|
{
|
|
public:
|
|
//
|
|
// Construction/Destruction
|
|
//
|
|
StringMatch();
|
|
~StringMatch();
|
|
|
|
//
|
|
// Set the pattern to search for. If given as a string needs to
|
|
// be in the form <string1>|<string2>|... If in the form of a
|
|
// List, it should be a list of String objects.
|
|
//
|
|
void Pattern(char *pattern, char sep = '|');
|
|
|
|
//
|
|
// Search for any of the strings in the pattern in the given
|
|
// string The return value is the offset in the source a pattern
|
|
// was found. In this case, the which variable will be set to the
|
|
// index of the pattern string and length will be set to the
|
|
// length of that pattern string. If none of the pattern strings
|
|
// could be found, the return value will be -1
|
|
//
|
|
int FindFirst(const char *string, int &which, int &length);
|
|
int FindFirst(const char *string);
|
|
|
|
int FindFirstWord(const char *string, int &which, int &length);
|
|
int FindFirstWord(const char *string);
|
|
|
|
//
|
|
// If you are interested in matching instead of searching, use
|
|
// the following. Same parameters except that the return value will
|
|
// be 1 if there was a match, 0 if there was not.
|
|
//
|
|
int Compare(const char *string, int &which, int &length);
|
|
int Compare(const char *string);
|
|
|
|
int CompareWord(const char *string, int &which, int &length);
|
|
int CompareWord(const char *string);
|
|
|
|
//
|
|
// Provide a character translation table which will be applied to
|
|
// both the pattern and the input string. This table should be an
|
|
// array of 256 characters. If is the caller's responsibility to
|
|
// manage this table's allocation. The table should remain valid
|
|
// until this object has been destroyed.
|
|
//
|
|
void TranslationTable(char *table);
|
|
|
|
//
|
|
// Build a local translation table which maps all uppercase
|
|
// characters to lowercase
|
|
//
|
|
void IgnoreCase();
|
|
|
|
//
|
|
// Build a local translation table which ignores all given punctuation
|
|
// characters
|
|
//
|
|
void IgnorePunct(char *punct = (char*)NULL);
|
|
|
|
//
|
|
// Determine if there is a pattern associated with this Match object.
|
|
//
|
|
int hasPattern() {return table[0] != 0;}
|
|
|
|
protected:
|
|
int *table[256];
|
|
unsigned char *trans;
|
|
int local_alloc;
|
|
};
|
|
|
|
#endif
|
|
|
|
|