You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

185 lines
4.8 KiB

//
// SplitMatches.cc
//
// SplitMatches:
// Holds a list of lists with the matches, as specified in
// search_results_order.
//
// Part of the ht://Dig package <http://www.htdig.org/>
// Copyright (c) 2000-2004 The ht://Dig Group
// For copyright details, see the file COPYING in your distribution
// or the GNU Library General Public License (LGPL) version 2 or later
// <http://www.gnu.org/copyleft/lgpl.html>
//
// $Id: SplitMatches.cc,v 1.6 2004/05/28 13:15:24 lha Exp $
#ifdef HAVE_CONFIG_H
#include "htconfig.h"
#endif /* HAVE_CONFIG_H */
#include "StringList.h"
#include "HtRegex.h"
#include "SplitMatches.h"
#include <stdio.h>
#include <ctype.h>
// This class is only used in private members of SplitMatches.
// The OO-right thing would be to nest this inside the private
// declaration of SplitMatches, but that would cause portability
// problems according to
// <URL:http://www.mozilla.org/hacking/portable-cpp.html#inner_classes>.
//
// It is used as a container for a key (String) and a list.
//
class MatchArea : public Object
{
public:
// Construct from a string applicable to StringMatch.
MatchArea(const String &);
~MatchArea();
// Does this item match?
// Fail if template is empty, since explicit "*" maps to empty template
inline bool Match(char *s)
{ return match.match(s, 0, 0) != 0; }
// Return the contained list.
List *MatchList() { return &myList; }
private:
HtRegex match;
List myList;
// These member functions are not supposed to be implemented, but
// mentioned here as private so the compiler will not generate them if
// someone puts in buggy code that would use them.
MatchArea();
MatchArea(const MatchArea &);
void operator= (const MatchArea &);
};
MatchArea::MatchArea(const String &url_regex)
{
// We do not want to "install" the catch-the-rest pattern as a real
// pattern; it must always return false for the "Match" operator.
if (strcmp("*", url_regex.get()) != 0)
{
StringList l(url_regex.get(),'|');
match.setEscaped(l);
}
}
MatchArea::~MatchArea()
{
}
SplitMatches::SplitMatches(Configuration &config)
{
char *config_item = "search_results_order";
StringList sl(config[config_item], "\t \r\n");
mySubAreas = new List();
myDefaultList = 0;
// Parse each as in TemplateList::createFromString.
for (int i = 0; i < sl.Count(); i++)
{
String sub_area_pattern = sl[i];
MatchArea *match_item = new MatchArea(sub_area_pattern);
mySubAreas->Add(match_item);
// If this is the magic catch-rest sub-area-pattern, we want to
// use its list-pointer to store all URLs that do not match
// anything else.
// We will iterate over a list where one of the patterns is
// known to not match, but that's a small penalty for keeping
// the code simple.
if (strcmp("*", sub_area_pattern.get()) == 0)
myDefaultList = match_item->MatchList();
}
// If we did not have a catch-the-rest pattern, install one at the
// end of the list.
if (myDefaultList == 0)
{
MatchArea *match_item = new MatchArea(String("*"));
mySubAreas->Add(match_item);
myDefaultList = match_item->MatchList();
}
}
SplitMatches::~SplitMatches()
{
// myDefaultList is a pointer to one of the items in mySubAreas and
// must not be explicitly deleted here.
delete mySubAreas;
}
void
SplitMatches::Add(ResultMatch *match, char *url)
{
List *area_list = mySubAreas;
MatchArea *area_item;
area_list->Start_Get();
// This is a linear search. If there's a problem with that, we
// can improve it. For now, a list with tens of areas seems lots,
// and break-even with a more clever search-scheme is probably in
// the hundreds.
while ((area_item = (MatchArea *) area_list->Get_Next()))
{
// Use the first match only.
if (area_item->Match(url))
{
area_item->MatchList()->Add(match);
return;
}
}
// We'll get here if no match was found, so we add to the
// catch-the-rest list.
myDefaultList->Add(match);
}
// Just a simple iterator function.
List *
SplitMatches::Get_Next()
{
MatchArea *next_area = (MatchArea *) mySubAreas->Get_Next();
List *next_area_list = 0;
if (next_area != 0)
next_area_list = next_area->MatchList();
return next_area_list;
}
// Rip out the sub-areas lists and concatenate them into one list.
List *
SplitMatches::JoinedLists()
{
// We make a new list here, so we don't have to worry about
// mySubAreas being dangling or null.
List *all_areas = new List();
List *sub_areas = mySubAreas;
MatchArea *area;
sub_areas->Start_Get();
while ((area = (MatchArea *) sub_areas->Get_Next()))
{
// "Destructively" move the contents of the list,
// leaving the original list empty.
all_areas->AppendList(*(area->MatchList()));
}
return all_areas;
}