#
# Part of the ht://Dig package
# Copyright (c) 1999-2004 The ht://Dig Group
# For copyright details, see the file COPYING in your distribution
# or the GNU Library General Public License (LGPL) version 2 or later
#
#
# $Id: t_search,v 1.4 2004/05/28 13:15:30 lha Exp $
#
. ./test_functions
export MIFLUZ_CONFIG ; MIFLUZ_CONFIG=${srcdir}/mifluz-search.conf
#
# Test the query parser
#
#
# Run $1 and expect $2 as a result
#
runparser() {
command="$1"
expected="$2"
out=`eval "$command"`
if [ "$expected" != "$out" ]
then
echo "running $command: expected
$expected
but got
$out"
exit 1
fi
}
#
# Simple test
#
runparser "./search -n -f '( and scope1 the world )' $VERBOSE" \
'( and "scope1" the world )'
#
# All boolean constructions
#
runparser "./search -n -f '( and scope1 ( not scope2 the ) world ( or scope3 is coming to ( near scope4 an ( literal scope5 end ) ) ) )' $VERBOSE" \
'( and "scope1" ( not "scope2" the ) world ( or "scope3" is coming to ( near "scope4" an ( literal "scope5" end ) ) ) )'
#
# Mandatory and Forbiden nodes
#
runparser "./search -n -f '( or scope1 ( mandatory scope2 the ) world ( forbiden scope3 is ) )' $VERBOSE" \
'( or "scope1" ( mandatory "scope2" the ) world ( not "scope3" is ) )'
#
# Test the WordExclude* classes
#
./search -e || exit 1
#
# Run queries with various operators on an index built from the content
# of search.txt.
#
./txt2mifluz $VERBOSE < $srcdir/search.txt
#
# Run $1 and expect $2 as a result (all lines starting with match:)
# Feed the context variable with output starting with context:, stripping
# context: itself.
#
runsearch() {
command="$1"
expected="$2"
if [ "$VERBOSE" ]
then
echo "running $command" >&2
fi
out=`eval "$command"`
match=`echo "$out" | grep '^match:'`
context=`echo "$out" | sed -n -e 's/^context: *//p'`
# echo "context: $context" >&2
if [ "$expected" != "$match" ]
then
echo "running $command: expected
$expected
but got
$match"
exit 1
fi
}
#
# Test context restoration on WordTreeLiteral
#
runsearch "./search -c 1 -f 'lazy' $VERBOSE" \
'match: 0 1 11 '
runsearch "./search -c 1 -C '$context' -f 'lazy' $VERBOSE" \
'match: 0 1 21 '
#
# Literal search using scope : only want documents with Flags set to 5
# Be carefull to use tabulation in scope.
#
runsearch "./search -f '( literal \" 5 \" lazy )' $VERBOSE" \
'match: 0 5 9
match: 0 5 21
match: 0 5 53
match: 0 5 56 '
#
# And search using scope : only want documents with Flags set to 5
# Be carefull to use tabulation in scope.
#
runsearch "./search -f '( and \"\" ( literal \" 5 \" lazy ) dog )' $VERBOSE" \
'match: 0 5 21 (dog lazy )
match: 0 5 53 (dog lazy )
match: 0 5 56 (dog lazy )'
#
# And/Not : document 20 is excluded because it contains 'an'
#
runsearch "./search -f '( and \"\" world ( not \"\" an ) the )' $VERBOSE" \
'match: 0 0 5 (world the )
match: 0 0 21 (world the )
match: 0 0 51 (world the )
match: 0 0 71 (world the )
match: 0 0 81 (world the )'
#
# Or/Not : document 20 is excluded because it contains 'an'
#
runsearch "./search -f '( or \"\" world ( not \"\" an ) the )' $VERBOSE" \
'match: 0 0 3 (the )
match: 0 0 5 (world the )
match: 0 0 11 (world )
match: 0 0 21 (world the )
match: 0 0 51 (world the )
match: 0 0 71 (world the )
match: 0 0 81 (world the )'
#
# Or : each word matches only once in separate documents
#
runsearch "./search -c 2 -f '( or \"\" comes end )' $VERBOSE" \
'match: 0 0 6 (comes )
match: 0 0 20 (end )'
#
# Or : each word matches only once in separate documents
# docid 20 contains 'the' and 'end', therefore first
# docid 6 contains 'comes', is second before any document
# containing 'the' alone because 'comes' is less frequent than 'the'
# other docid only contain 'the'.
#
runsearch "./search -c 8 -f '( or \"\" the comes end )' $VERBOSE" \
'match: 0 0 3 (the )
match: 0 0 5 (the )
match: 0 0 6 (comes )
match: 0 0 20 (the end )
match: 0 0 21 (the )
match: 0 0 51 (the )
match: 0 0 71 (the )
match: 0 0 81 (the )'
#
# Run the same search in 3 times using context to resume search
#
runsearch "./search -c 2 -f '( or \"\" the comes end )' $VERBOSE" \
'match: 0 0 3 (the )
match: 0 0 5 (the )'
runsearch "./search -c 2 -C '$context' -f '( or \"\" the comes end )' $VERBOSE" \
'match: 0 0 6 (comes )
match: 0 0 20 (the end )'
runsearch "./search -c 5 -C '$context' -f '( or \"\" the comes end )' $VERBOSE" \
'match: 0 0 21 (the )
match: 0 0 51 (the )
match: 0 0 71 (the )
match: 0 0 81 (the )'
#
# After a search that stopped because there was not matches left, there
# must be no context for resuming.
#
if test "$context" != ""
then
echo "Expected empty context after fulfilled search"
exit 1
fi
#
# Or search with word not in database (klklk)
#
runsearch "./search -f '( or \"\" the klkl )' $VERBOSE" \
'match: 0 0 3 (the )
match: 0 0 5 (the )
match: 0 0 20 (the )
match: 0 0 21 (the )
match: 0 0 51 (the )
match: 0 0 71 (the )
match: 0 0 81 (the )'
#
# Compound boolean query: nested 'and'
#
runsearch "./search -f '( and \"\" the ( and \"\" an end ) )' $VERBOSE" \
'match: 0 0 20 (an end the )'
#
# Compound boolean query: nested 'and' that fails immediately
# because 'foo' is not in the inverted index.
#
runsearch "./search -f '( and \"\" the ( and \"\" an foo ) )' $VERBOSE" \
'match: none'
#
# Compound boolean query: 'or' & 'and'
#
runsearch "./search -f '( and \"\" the ( or \"\" comes end ) )' $VERBOSE" \
'match: 0 0 20 (end the )'
runsearch "./search -f '( or \"\" comes ( and \"\" the world ) )' $VERBOSE" \
'match: 0 0 5 (world the )
match: 0 0 6 (comes )
match: 0 0 20 (world the )
match: 0 0 21 (world the )
match: 0 0 51 (world the )
match: 0 0 71 (world the )
match: 0 0 81 (world the )'
runsearch "./search -P 1 -f '( or \"\" comes ( near \"\" lazy dog ) )' $VERBOSE" \
'match: 0 0 6 (comes )
match: 0 1 11 (lazy dog proximity )
match: 0 1 21 (lazy dog proximity )
match: 0 5 21 (lazy dog proximity )
match: 0 5 56 (lazy dog proximity )'
#
# Compound boolean query: limit to 2 documents
#
runsearch "./search -f '( or \"\" comes ( or \"\" the world ) )' $VERBOSE" \
'match: 0 0 3 (the )
match: 0 0 5 (the world )
match: 0 0 6 (comes )
match: 0 0 11 (world )
match: 0 0 20 (the world )
match: 0 0 21 (the world )
match: 0 0 51 (the world )
match: 0 0 71 (the world )
match: 0 0 81 (the world )'
runsearch "./search -c 1 -f '( or \"\" comes ( or \"\" the world ) )' $VERBOSE" \
'match: 0 0 3 (the )'
runsearch "./search -c 4 -C '$context' -f '( or \"\" comes ( or \"\" the world ) )' $VERBOSE" \
'match: 0 0 5 (the world )
match: 0 0 6 (comes )
match: 0 0 11 (world )
match: 0 0 20 (the world )'
#
# Compound boolean query: nested 'optional'
#
runsearch "./search -f '( optional \"\" the ( optional \"\" world foo ) )' $VERBOSE" \
'match: 0 0 5 (world the proximity)
match: 0 0 20 (world the )
match: 0 0 21 (world the )
match: 0 0 51 (world the )
match: 0 0 71 (world the )
match: 0 0 81 (world the )
match: 0 0 11 (world )
match: