# # Part of the ht://Dig package # Copyright (c) 1999-2004 The ht://Dig Group # For copyright details, see the file COPYING in your distribution # or the GNU Library General Public License (LGPL) version 2 or later # # # $Id: t_search,v 1.4 2004/05/28 13:15:30 lha Exp $ # . ./test_functions export MIFLUZ_CONFIG ; MIFLUZ_CONFIG=${srcdir}/mifluz-search.conf # # Test the query parser # # # Run $1 and expect $2 as a result # runparser() { command="$1" expected="$2" out=`eval "$command"` if [ "$expected" != "$out" ] then echo "running $command: expected $expected but got $out" exit 1 fi } # # Simple test # runparser "./search -n -f '( and scope1 the world )' $VERBOSE" \ '( and "scope1" the world )' # # All boolean constructions # runparser "./search -n -f '( and scope1 ( not scope2 the ) world ( or scope3 is coming to ( near scope4 an ( literal scope5 end ) ) ) )' $VERBOSE" \ '( and "scope1" ( not "scope2" the ) world ( or "scope3" is coming to ( near "scope4" an ( literal "scope5" end ) ) ) )' # # Mandatory and Forbiden nodes # runparser "./search -n -f '( or scope1 ( mandatory scope2 the ) world ( forbiden scope3 is ) )' $VERBOSE" \ '( or "scope1" ( mandatory "scope2" the ) world ( not "scope3" is ) )' # # Test the WordExclude* classes # ./search -e || exit 1 # # Run queries with various operators on an index built from the content # of search.txt. # ./txt2mifluz $VERBOSE < $srcdir/search.txt # # Run $1 and expect $2 as a result (all lines starting with match:) # Feed the context variable with output starting with context:, stripping # context: itself. # runsearch() { command="$1" expected="$2" if [ "$VERBOSE" ] then echo "running $command" >&2 fi out=`eval "$command"` match=`echo "$out" | grep '^match:'` context=`echo "$out" | sed -n -e 's/^context: *//p'` # echo "context: $context" >&2 if [ "$expected" != "$match" ] then echo "running $command: expected $expected but got $match" exit 1 fi } # # Test context restoration on WordTreeLiteral # runsearch "./search -c 1 -f 'lazy' $VERBOSE" \ 'match: 0 1 11 ' runsearch "./search -c 1 -C '$context' -f 'lazy' $VERBOSE" \ 'match: 0 1 21 ' # # Literal search using scope : only want documents with Flags set to 5 # Be carefull to use tabulation in scope. # runsearch "./search -f '( literal \" 5 \" lazy )' $VERBOSE" \ 'match: 0 5 9 match: 0 5 21 match: 0 5 53 match: 0 5 56 ' # # And search using scope : only want documents with Flags set to 5 # Be carefull to use tabulation in scope. # runsearch "./search -f '( and \"\" ( literal \" 5 \" lazy ) dog )' $VERBOSE" \ 'match: 0 5 21 (dog lazy ) match: 0 5 53 (dog lazy ) match: 0 5 56 (dog lazy )' # # And/Not : document 20 is excluded because it contains 'an' # runsearch "./search -f '( and \"\" world ( not \"\" an ) the )' $VERBOSE" \ 'match: 0 0 5 (world the ) match: 0 0 21 (world the ) match: 0 0 51 (world the ) match: 0 0 71 (world the ) match: 0 0 81 (world the )' # # Or/Not : document 20 is excluded because it contains 'an' # runsearch "./search -f '( or \"\" world ( not \"\" an ) the )' $VERBOSE" \ 'match: 0 0 3 (the ) match: 0 0 5 (world the ) match: 0 0 11 (world ) match: 0 0 21 (world the ) match: 0 0 51 (world the ) match: 0 0 71 (world the ) match: 0 0 81 (world the )' # # Or : each word matches only once in separate documents # runsearch "./search -c 2 -f '( or \"\" comes end )' $VERBOSE" \ 'match: 0 0 6 (comes ) match: 0 0 20 (end )' # # Or : each word matches only once in separate documents # docid 20 contains 'the' and 'end', therefore first # docid 6 contains 'comes', is second before any document # containing 'the' alone because 'comes' is less frequent than 'the' # other docid only contain 'the'. # runsearch "./search -c 8 -f '( or \"\" the comes end )' $VERBOSE" \ 'match: 0 0 3 (the ) match: 0 0 5 (the ) match: 0 0 6 (comes ) match: 0 0 20 (the end ) match: 0 0 21 (the ) match: 0 0 51 (the ) match: 0 0 71 (the ) match: 0 0 81 (the )' # # Run the same search in 3 times using context to resume search # runsearch "./search -c 2 -f '( or \"\" the comes end )' $VERBOSE" \ 'match: 0 0 3 (the ) match: 0 0 5 (the )' runsearch "./search -c 2 -C '$context' -f '( or \"\" the comes end )' $VERBOSE" \ 'match: 0 0 6 (comes ) match: 0 0 20 (the end )' runsearch "./search -c 5 -C '$context' -f '( or \"\" the comes end )' $VERBOSE" \ 'match: 0 0 21 (the ) match: 0 0 51 (the ) match: 0 0 71 (the ) match: 0 0 81 (the )' # # After a search that stopped because there was not matches left, there # must be no context for resuming. # if test "$context" != "" then echo "Expected empty context after fulfilled search" exit 1 fi # # Or search with word not in database (klklk) # runsearch "./search -f '( or \"\" the klkl )' $VERBOSE" \ 'match: 0 0 3 (the ) match: 0 0 5 (the ) match: 0 0 20 (the ) match: 0 0 21 (the ) match: 0 0 51 (the ) match: 0 0 71 (the ) match: 0 0 81 (the )' # # Compound boolean query: nested 'and' # runsearch "./search -f '( and \"\" the ( and \"\" an end ) )' $VERBOSE" \ 'match: 0 0 20 (an end the )' # # Compound boolean query: nested 'and' that fails immediately # because 'foo' is not in the inverted index. # runsearch "./search -f '( and \"\" the ( and \"\" an foo ) )' $VERBOSE" \ 'match: none' # # Compound boolean query: 'or' & 'and' # runsearch "./search -f '( and \"\" the ( or \"\" comes end ) )' $VERBOSE" \ 'match: 0 0 20 (end the )' runsearch "./search -f '( or \"\" comes ( and \"\" the world ) )' $VERBOSE" \ 'match: 0 0 5 (world the ) match: 0 0 6 (comes ) match: 0 0 20 (world the ) match: 0 0 21 (world the ) match: 0 0 51 (world the ) match: 0 0 71 (world the ) match: 0 0 81 (world the )' runsearch "./search -P 1 -f '( or \"\" comes ( near \"\" lazy dog ) )' $VERBOSE" \ 'match: 0 0 6 (comes ) match: 0 1 11 (lazy dog proximity ) match: 0 1 21 (lazy dog proximity ) match: 0 5 21 (lazy dog proximity ) match: 0 5 56 (lazy dog proximity )' # # Compound boolean query: limit to 2 documents # runsearch "./search -f '( or \"\" comes ( or \"\" the world ) )' $VERBOSE" \ 'match: 0 0 3 (the ) match: 0 0 5 (the world ) match: 0 0 6 (comes ) match: 0 0 11 (world ) match: 0 0 20 (the world ) match: 0 0 21 (the world ) match: 0 0 51 (the world ) match: 0 0 71 (the world ) match: 0 0 81 (the world )' runsearch "./search -c 1 -f '( or \"\" comes ( or \"\" the world ) )' $VERBOSE" \ 'match: 0 0 3 (the )' runsearch "./search -c 4 -C '$context' -f '( or \"\" comes ( or \"\" the world ) )' $VERBOSE" \ 'match: 0 0 5 (the world ) match: 0 0 6 (comes ) match: 0 0 11 (world ) match: 0 0 20 (the world )' # # Compound boolean query: nested 'optional' # runsearch "./search -f '( optional \"\" the ( optional \"\" world foo ) )' $VERBOSE" \ 'match: 0 0 5 (world the proximity) match: 0 0 20 (world the ) match: 0 0 21 (world the ) match: 0 0 51 (world the ) match: 0 0 71 (world the ) match: 0 0 81 (world the ) match: 0 0 11 (world ) match: 0 0 3 (the )' # # # Most simple search : single word # runsearch "./search -f 'the' $VERBOSE" \ 'match: 0 0 3 match: 0 0 5 match: 0 0 20 match: 0 0 21 match: 0 0 51 match: 0 0 71 match: 0 0 81 ' # # Get all we can # runsearch "./search -f '( and \"\" the world )' $VERBOSE" \ 'match: 0 0 5 (world the ) match: 0 0 20 (world the ) match: 0 0 21 (world the ) match: 0 0 51 (world the ) match: 0 0 71 (world the ) match: 0 0 81 (world the )' # # First two # runsearch "./search -c 2 -f '( and \"\" the world )' $VERBOSE" \ 'match: 0 0 5 (world the ) match: 0 0 20 (world the )' # # The next two # runsearch "./search -b 2 -c 2 -f '( and \"\" the world )' $VERBOSE" \ 'match: 0 0 21 (world the ) match: 0 0 51 (world the )' # # First four # runsearch "./search -c 4 -f '( and \"\" the world )' $VERBOSE" \ 'match: 0 0 5 (world the ) match: 0 0 20 (world the ) match: 0 0 21 (world the ) match: 0 0 51 (world the )' # # Next document, using last document returned # runsearch "./search -c 1 -C '$context' -f '( and \"\" the world )' $VERBOSE" \ 'match: 0 0 71 (world the )' # # Implicit or : each word matches only once in separate documents # runsearch "./search -c 2 -f '( optional \"\" comes end )' $VERBOSE" \ 'match: 0 0 6 (comes ) match: 0 0 20 (end )' # # Implicit or : each word matches only once in separate documents # docid 20 contains 'the' and 'end', therefore first # docid 6 contains 'comes', is second before any document # containing 'the' alone because 'comes' is less frequent than 'the' # other docid only contain 'the'. # runsearch "./search -c 8 -f '( optional \"\" the comes end )' $VERBOSE" \ 'match: 0 0 20 (end the proximity) match: 0 0 6 (comes ) match: 0 0 3 (the ) match: 0 0 5 (the ) match: 0 0 21 (the ) match: 0 0 51 (the ) match: 0 0 71 (the ) match: 0 0 81 (the )' # # Run the same search in 3 times using context to resume search # runsearch "./search -c 2 -f '( optional \"\" the comes end )' $VERBOSE" \ 'match: 0 0 20 (end the proximity) match: 0 0 6 (comes )' runsearch "./search -c 2 -C '$context' -f '( optional \"\" the comes end )' $VERBOSE" \ 'match: 0 0 3 (the ) match: 0 0 5 (the )' runsearch "./search -c 5 -C '$context' -f '( optional \"\" the comes end )' $VERBOSE" \ 'match: 0 0 21 (the ) match: 0 0 51 (the ) match: 0 0 71 (the ) match: 0 0 81 (the )' # # After a search that stopped because there was not matches left, there # must be no context for resuming. # if test "$context" != "" then echo "Expected empty context after fulfilled search" exit 1 fi # # Or search with word not in database (klklk) # runsearch "./search -f '( optional \"\" the klkl )' $VERBOSE" \ 'match: 0 0 3 (the ) match: 0 0 5 (the ) match: 0 0 20 (the ) match: 0 0 21 (the ) match: 0 0 51 (the ) match: 0 0 71 (the ) match: 0 0 81 (the )' # # And search with word not in database (klklk) # runsearch "./search -c 1 -f '( and \"\" comes klkl )' $VERBOSE" \ 'match: none' # # From there we deal with more complex keys (TAG,SERVER,URL) # instead of URL alone above. # # # And search with 'dog lazy' # runsearch "./search -c 3 -f '( and \"\" dog lazy )' $VERBOSE" \ 'match: 0 1 11 (dog lazy ) match: 0 1 21 (dog lazy ) match: 0 5 21 (dog lazy )' # # And search with 'dog lazy' one URL per server only (-S) # runsearch "./search -S -f '( and \"\" dog lazy )' $VERBOSE" \ 'match: 0 1 11 (dog lazy ) match: 0 5 21 (dog lazy ) match: 0 6 1 (dog lazy )' # # Or search with 'dog lazy' one URL per server only (-S) # runsearch "./search -S -f '( optional \"\" dog lazy )' $VERBOSE" \ 'match: 0 1 11 (dog lazy proximity) match: 0 5 21 (dog lazy ) match: 0 6 1 (dog lazy ) match: 0 5 9 (lazy )' # # Near search with 'lazy dog' # runsearch "./search -f '( near \"\" lazy dog )' $VERBOSE" \ 'match: 0 1 11 (lazy dog proximity) match: 0 1 21 (lazy dog proximity) match: 0 5 21 (lazy dog proximity) match: 0 5 56 (lazy dog proximity)' # # Near search with 'dog lazy' # runsearch "./search -f '( near \"\" dog lazy )' $VERBOSE" \ 'match: 0 1 11 (dog lazy proximity)' # # Near search with 'dog lazy', order of term is not meaningfull # matching 'dog lazy' and 'lazy dog' # runsearch "./search -P -1 -f '( near \"\" dog lazy )' $VERBOSE" \ 'match: 0 1 11 (dog lazy proximity) match: 0 1 21 (dog lazy proximity) match: 0 5 21 (dog lazy proximity) match: 0 5 56 (dog lazy proximity)' # # Near search with 'dog lazy', order of term is not meaningfull # tolerance is -2, adding match for 'dog ? lazy' and 'lazy ? dog' # runsearch "./search -P -2 -f '( near \"\" dog lazy )' $VERBOSE" \ 'match: 0 1 11 (dog lazy proximity) match: 0 1 21 (dog lazy proximity) match: 0 5 21 (dog lazy proximity) match: 0 5 53 (dog lazy proximity) match: 0 5 56 (dog lazy proximity)' # # Near search with 'dog lazy', order of term is meaningfull # tolerance is 3, adding match for 'dog ? lazy' and 'dog ? ? lazy' # runsearch "./search -P 3 -f '( near \"\" dog lazy )' $VERBOSE" \ 'match: 0 1 11 (dog lazy proximity) match: 0 6 1 (dog lazy proximity)' # # Near search with 'lazy dog', only first 2 # runsearch "./search -c 2 -f '( near \"\" lazy dog )' $VERBOSE" \ 'match: 0 1 11 (lazy dog proximity) match: 0 1 21 (lazy dog proximity)' # # Near search with 'lazy dog', resume from previous search # and get 2 more. # runsearch "./search -c 2 -C '$context' -f '( near \"\" lazy dog )' $VERBOSE" \ 'match: 0 5 21 (lazy dog proximity) match: 0 5 56 (lazy dog proximity)' # # Near search with non existent word # runsearch "./search -f '( near \"\" lazy bar )' $VERBOSE" \ 'match: none' # # Or search using proximity (document 0 5 53 contains lazy ? dog) # order of term is meaningfull. # runsearch "./search -P 2 -f '( optional \"\" lazy dog )' $VERBOSE" \ 'match: 0 1 11 (dog lazy proximity) match: 0 1 21 (dog lazy ) match: 0 5 21 (dog lazy ) match: 0 5 53 (dog lazy ) match: 0 5 56 (dog lazy ) match: 0 6 1 (dog lazy ) match: 0 5 9 (lazy )' # # Or search using proximity (document 0 5 53 contains lazy ? dog) # order of term is not meaningfull. # runsearch "./search -P -2 -f '( optional \"\" lazy dog )' $VERBOSE" \ 'match: 0 1 11 (dog lazy proximity) match: 0 1 21 (dog lazy proximity) match: 0 5 21 (dog lazy proximity) match: 0 5 53 (dog lazy proximity) match: 0 5 56 (dog lazy proximity) match: 0 6 1 (dog lazy ) match: 0 5 9 (lazy )'