#!/bin/sh
# Part of the ht://Dig package
# Copyright (c) 1999-2004 The ht://Dig Group
# For copyright details, see the file COPYING in your distribution
# or the GNU Library General Public License (LGPL) version 2 or later
#
#
# $Id: t_factors,v 1.7 2004/06/05 06:26:22 lha Exp $
#
# Tests (or should eventually test) the following config attributes:
# author_factor
# backlink_factor
# caps_factor
# date_factor (TODO)
# description_factor
# heading_factor
# keywords_factor
# meta_description_factor
# multimatch_factor
# search_results_order
# text_factor
# title_factor
# url_seed_score
# url_text_factor
# try_order comment query pattern1 patern2 ...
# comment - description of test, displayed if error occurs
# query - search string passed to htsearch
# pattern - strings expected to occur *in order* in the output
try_order() {
comment="$1"
shift
query="$1"
shift
$htsearch -c $config "$query" > $tmp 2> /dev/null
array=""
for pattern
do
array="$array; array[i++] = "\"$pattern\"
done
miss=`$awk "BEGIN {$array; line = 0; } \
"'$0'" ~ \".*\"array[line] { line++ } \
END { print array[line] } " < $tmp `
if [ "$miss" != "" ]
then
$htsearch -vv -c $config "$query" > /dev/null
echo "String \"$miss\" was not found where expected"
fail "$htsearch -c $config '$query' >> $tmp --
$comment"
fi
}
test_functions_action=--start-apache
. ./test_functions
config=$testdir/conf/htdig.conf.tmp
tmp=/tmp/t_htsearch$$
# set up config file with chosen non-default values
cp $testdir/conf/htdig.conf $config
$htdig "$@" -t -i -c $config || fail "Couldn't dig"
$htpurge -c $config || fail "Couldn't purge"
try_order "Search for 'also'" \
"words=also" \
'4 matches' 'site2.html' 'site4.html' 'bad_local.htm' 'script.html'
set_attr url_seed_score "site4 *1000+1000"
try_order "Seed score 1000 for site4.html" \
"words=also" \
'4 matches' 'site4.html' 'site2.html' 'bad_local.htm' 'script.html'
set_attr url_seed_score "site4 *1000+1000 script *1000+1000"
try_order "Seed score 1000 for site4.html and script.html" \
"words=also" \
'4 matches' 'site4.html' 'script.html' 'site2.html' 'bad_local.htm'
set_attr url_seed_score "site4|script *1000+1000"
try_order "Seed score 1000 for site4|script" \
"words=also" \
'4 matches' 'site4.html' 'script.html' 'site2.html' 'bad_local.htm'
set_attr search_results_order "bad_local"
try_order "Search_results_order bad_local" \
"words=also" \
'4 matches' 'bad_local.htm' 'site4.html' 'script.html' 'site2.html'
set_attr search_results_order "script * e2|e4"
try_order "Search_results_order * script e2|e4" \
"words=also" \
'4 matches' 'script.html' 'bad_local.htm' 'site4.html' 'site2.html'
set_attr url_seed_score ""
set_attr search_results_order ""
set_attr author_factor 0
set_attr backlink_factor 0
set_attr caps_factor 0 # not implemented
set_attr date_factor 0 # TODO
set_attr description_factor 0
set_attr heading_factor 0
set_attr keywords_factor 0
set_attr meta_description_factor 0
set_attr multimatch_factor 0
set_attr text_factor 0
set_attr title_factor 0
set_attr url_text_factor 0 # not implemented
try_order "Search with factors 0" \
"words=also" \
'No matches'
try_order "Search for 'service' with title_factor 0" \
"words=service" \
'No matches'
set_attr title_factor 1
try_order "Search for 'service' with title_factor 1" \
"words=service" \
'1 matches' 'script.html'
set_attr text_factor 0.3
try_order "Greater weight to title factor" \
"words=service" \
'4 matches' 'script.html' 'site4.html' 'site%201.html' 'site3.html'
set_attr title_factor -3.2
try_order "Checking negative title factor" \
"words=service" \
'4 matches' 'site4.html' 'site%201.html' 'site3.html' 'script.html'
set_attr title_factor 0
set_attr text_factor 0
# test with all factors 0 except the one which matches
set_attr description_factor 1
try_order "Search for 'crossRef' with description_factor 1" \
"words=crossRef" \
'1 matches' 'site%201.html'
set_attr description_factor 0
set_attr author_factor 1
try_order "Search for 'media' with author_factor 1" \
"words=media" \
'1 matches' 'script.html'
set_attr author_factor 0
set_attr meta_description_factor 1
try_order "Search for 'stars' with meta_description_factor 1" \
"words=stars" \
'1 matches' 'site2.html'
set_attr meta_description_factor 0
set_attr heading_factor 1
try_order "Search for 'obtain' with heading_factor 1" \
"words=obtain" \
'1 matches' 'bad_local.htm'
set_attr heading_factor 0
set_attr keywords_factor 1
try_order "Search for 'newWord' with keywords_factor 1" \
"words=newWord" \
'1 matches' 'title.html'
set_attr keywords_factor 0
# test with all document-based factors non-zero except the one which matches
set_attr author_factor 1
#set_attr backlink_factor 1 # not document based
set_attr caps_factor 1
#set_attr date_factor 1 # not document based
set_attr description_factor 1
set_attr heading_factor 1
set_attr keywords_factor 1
set_attr meta_description_factor 1
set_attr multimatch_factor 1
set_attr text_factor 1
set_attr title_factor 1
set_attr url_text_factor 1
set_attr description_factor 1
set_attr description_factor 0
try_order "Search for 'crossRef' with description_factor 0" \
"words=crossRef" \
'1 matches' 'title.html'
set_attr description_factor 1
set_attr author_factor 0
try_order "Search for 'media' with author_factor 0" \
"words=media" \
'No matches'
set_attr author_factor 1
set_attr meta_description_factor 0
try_order "Search for 'stars' with meta_description_factor 0" \
"words=stars" \
'No matches'
set_attr meta_description_factor 1
set_attr heading_factor 0
try_order "Search for 'obtain' with heading_factor 0" \
"words=obtain" \
'No matches'
set_attr heading_factor 1
set_attr keywords_factor 0
try_order "Search for 'newWord' with keywords_factor 0" \
"words=newWord" \
'No matches'
set_attr keywords_factor 1
# multimatch_factor gives a "boost" to searches matching multiple terms
set_attr title_factor 10 # "get" in title of bad_local
set_attr multimatch_factor 10000
try_order "Search for 'get or interest or repay' with multimatch_factor 10000" \
"words=get+interest+repay;method=or" \
'2 matches' 'site4.html' 'bad_local.htm'
set_attr multimatch_factor 0
try_order "Search for 'get or interest or repay' with multimatch_factor 0" \
"words=get+interest+repay;method=or" \
'2 matches' 'bad_local.htm' 'site4.html'
# backlink counts the number of references (of any type) to this document
set_attr backlink_factor 0
try_order "site4.html has repay+interest, site 1.html only has suggestions" \
"words=suggestions+repay+interest;method=or" \
'2 matches' 'site4.html' 'site%201.html'
set_attr backlink_factor 100
try_order "site 1.html has a higher ratio of backlinks to outgoing links" \
"words=suggestions+repay+interest;method=or" \
'2 matches' 'site%201.html' 'site4.html'
test_functions_action=--stop-apache
. ./test_functions