You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
190 lines
5.4 KiB
190 lines
5.4 KiB
3 years ago
|
#
|
||
|
# Part of the ht://Dig package <http://www.htdig.org/>
|
||
|
# Copyright (c) 1999-2004 The ht://Dig Group
|
||
|
# For copyright details, see the file COPYING in your distribution
|
||
|
# or the GNU Library General Public License (LGPL) version 2 or later
|
||
|
# <http://www.gnu.org/copyleft/lgpl.html>
|
||
|
#
|
||
|
# $Id: t_parsing,v 1.4 2004/05/28 13:15:30 lha Exp $
|
||
|
#
|
||
|
|
||
|
|
||
|
# Tests (or should eventually test) the following config attributes:
|
||
|
# description_meta_tag_names
|
||
|
# ignore_alt_text
|
||
|
# max_doc_size
|
||
|
# max_keywords
|
||
|
# max_meta_description_length
|
||
|
# max_description_length
|
||
|
# max_descriptions
|
||
|
# max_head_length
|
||
|
# noindex_end
|
||
|
# noindex_start
|
||
|
# external_parsers
|
||
|
# external_protocols
|
||
|
# use_meta_description
|
||
|
|
||
|
|
||
|
test_functions_action=--start-apache
|
||
|
. ./test_functions
|
||
|
|
||
|
config=$testdir/conf/htdig.conf.tmp
|
||
|
tmp=/tmp/t_htsearch$$
|
||
|
|
||
|
# set up config file with chosen non-default values
|
||
|
cp $testdir/conf/htdig.conf $config
|
||
|
|
||
|
try() {
|
||
|
comment="$1"
|
||
|
shift
|
||
|
query="$1"
|
||
|
shift
|
||
|
$htsearch -c $config "$query" > $tmp
|
||
|
for pattern
|
||
|
do
|
||
|
if grep "$pattern" $tmp > /dev/null
|
||
|
then :
|
||
|
else
|
||
|
$htsearch -v -c $config "$query" > /dev/null
|
||
|
echo "Output doesn't match \"$pattern\""
|
||
|
fail "$htsearch -c $config '$query' >> $tmp --
|
||
|
$comment"
|
||
|
fi
|
||
|
done
|
||
|
}
|
||
|
|
||
|
|
||
|
# Tests (or should eventually test) the following config attributes:
|
||
|
# description_meta_tag_names
|
||
|
# ignore_alt_text
|
||
|
# max_doc_size
|
||
|
# max_keywords
|
||
|
# max_meta_description_length
|
||
|
# max_description_length (May put in t_templates)
|
||
|
# max_descriptions (May put in t_templates)
|
||
|
# max_head_length
|
||
|
# noindex_end
|
||
|
# noindex_start
|
||
|
# external_parsers (TODO)
|
||
|
# external_protocols
|
||
|
# use_meta_description
|
||
|
|
||
|
$htdig "$@" -t -i -c $config || fail "Couldn't do first dig"
|
||
|
$htpurge -c $config || fail "Couldn't do first purge"
|
||
|
|
||
|
try "Search for alt text 'earth'" \
|
||
|
"words=earth" \
|
||
|
'1 matches' 'site3.html'
|
||
|
|
||
|
try "'claims and collections', unlimited doc size" \
|
||
|
"words=%22claims+and+collections%22" \
|
||
|
'1 matches' 'site4.html'
|
||
|
|
||
|
try "Search for keyword 'martial', default max_keywords" \
|
||
|
"words=martial" \
|
||
|
'1 matches' 'site2.html'
|
||
|
|
||
|
try "Search for 'service', default noindex_start/end" \
|
||
|
"words=technical" \
|
||
|
'1 matches' 'site%201.html'
|
||
|
|
||
|
set_attr use_meta_description true
|
||
|
try "Search for 'call handling' with default max_meta_description_length" \
|
||
|
"words=%22call+handling%22" \
|
||
|
'1 matches' 'script.html' 'call handling.*signalling'
|
||
|
|
||
|
set_attr ignore_alt_text true
|
||
|
set_attr max_doc_size 15112
|
||
|
set_attr max_keywords 5
|
||
|
set_attr noindex_start "'Software Distribution'"
|
||
|
set_attr noindex_end "'Contact Information'"
|
||
|
set_attr max_meta_description_length 80
|
||
|
set_attr description_meta_tag_names "description generator"
|
||
|
set_attr max_head_length 30
|
||
|
|
||
|
$htdig "$@" -t -i -c $config || fail "Couldn't do second dig"
|
||
|
$htpurge -c $config || fail "Couldn't do second purge"
|
||
|
|
||
|
try "Search for alt text 'earth' with ignore_alt_text=true" \
|
||
|
"words=earth" \
|
||
|
'No matches'
|
||
|
|
||
|
try "'claims and collections', max_doc_size 15112" \
|
||
|
"words=%22claims+and+collections%22" \
|
||
|
'1 matches' 'site4.html'
|
||
|
|
||
|
# (Martial is 6th keyword listed in site 2, but "Fu" is too short and omitted.)
|
||
|
try "Search for keyword 'martial', max_keywords = 5" \
|
||
|
"words=martial" \
|
||
|
'No matches'
|
||
|
|
||
|
# Only occurrence of "technical" is between noindex_start and _end in site 1
|
||
|
try "Search for 'technical', noindex_start=Software Distribution, noindex_end=Contact Information" \
|
||
|
"words=technical" \
|
||
|
'No matches'
|
||
|
|
||
|
# Visitor occurs after noindex_end
|
||
|
try "Search for 'visitor', noindex_start=Software Distribution, noindex_end=Contact Information" \
|
||
|
"words=visitor" \
|
||
|
'2 matches' 'site%201.html' 'site3.html'
|
||
|
|
||
|
# Displaying meta description instead of excerpt, check it is truncated
|
||
|
try "Search for 'call handling' with max_meta_description_length=80" \
|
||
|
"words=%22call+handling%22" \
|
||
|
'1 matches' 'script.html' 'means of<br>'
|
||
|
|
||
|
# Check <meta name="generator"...> counts as a description
|
||
|
try "Search for 'category', description_meta_tag_names includes 'generator'" \
|
||
|
"words=category" \
|
||
|
'1 matches' 'site3.html' 'FrontPage'
|
||
|
|
||
|
# Check that only specified number of bytes of header is stored.
|
||
|
# Header size is rounded up to contain the whole of the last word.
|
||
|
try "Search for 'also', max_head_length=30" \
|
||
|
"words=also" \
|
||
|
'4 matches' 'bad_local.htm' 'site2.html' 'script.html' 'site4.html' \
|
||
|
'WHERE.*Copyright<br>'
|
||
|
|
||
|
set_attr max_doc_size 15042
|
||
|
set_attr max_keywords 6
|
||
|
set_attr noindex_start "'software distribution'"
|
||
|
set_attr noindex_end "'contact information'"
|
||
|
|
||
|
$htdig "$@" -t -i -c $config || fail "Couldn't do third dig"
|
||
|
$htpurge -c $config || fail "Couldn't do third purge"
|
||
|
|
||
|
try "Search for keyword 'martial', max_keywords = 6" \
|
||
|
"words=martial" \
|
||
|
'1 matches' 'site2.html'
|
||
|
|
||
|
try "'claims and collections', max_doc_size 15042" \
|
||
|
"words=%22claims+and+collections%22" \
|
||
|
'No matches'
|
||
|
|
||
|
# Check noindex_start/end are case-insensitive
|
||
|
try "Search for 'technical', noindex_start=software distribution, noindex_end=contact information" \
|
||
|
"words=technical" \
|
||
|
'No matches'
|
||
|
|
||
|
PROTOCOL=my-protocol
|
||
|
echo '#!/bin/sh
|
||
|
echo "s 200"
|
||
|
echo "t text/html"
|
||
|
echo
|
||
|
echo "<html>$2</html>"' > $PROTOCOL
|
||
|
chmod 755 $PROTOCOL
|
||
|
set_attr external_protocols "echo: $PWD/$PROTOCOL"
|
||
|
set_attr start_url "echo:foo.html"
|
||
|
$htdig "$@" -t -i -c $config || fail "Couldn't do fourth dig"
|
||
|
try "trying external protocol echo" \
|
||
|
"words=foo" \
|
||
|
"1 matches" "echo:foo.html"
|
||
|
|
||
|
|
||
|
test_functions_action=--stop-apache
|
||
|
. ./test_functions
|
||
|
|
||
|
rm -f $tmp $PROTOCOL
|
||
|
|
||
|
exit 0
|