#!/usr/local/bin/perl
#########################################################
# part of the HTML Dictionary
# Distributed under the GNU copyleft (any version of your choice)
# No part of these documents may be printed in any for-profit publication
# copyleft sunil@magnetic.demon.co.uk
#########################################################
#########################################################
# this script makes use of Glimpse, developed by
# Udi Manber, Burra Gopal: University of Arizona
# Sun Wu : National Chung-Cheng University, Taiwan
# this version make use of the glimpse server and strips
# out anything that looks like html.
#########################################################
#########################################################
# CONFIGURE THESE
#########################################################
#########################################################
# this little script assumes that the databases have a
# thin and fat sub directory.
# databases
# |
# +------+-------+-------+
# | | | |
# this that other misc
# |
# +-----+----+
# | |
# fat thin
# | |
# .glimpse_index .glimpse_index
# .glimpse_stat.. .glimpse...
# ....
#
# the fat index should have been indexed using
# glimpseindex -o ....
# glimpseindex -B -f -s ....
#
# two glimpseservers are needed for each database because
# there are two databases (fat and thin).
#########################################################
#########################################################
# *** put sensible things here ***
#-------where does glimpse live?--------------------------
$glimpse_dir="/usr/local/lib/glimpse";
$glimpse_bin="$glimpse_dir";
#--------information about the database-------------------
$glimpse_dbs="/database/directory/";
$db_name="all";
$title="search my server";
$doc_root="/usr/share/htdocs/"
$doc_server="http://my_server:my_port/";
#--------if you are using the glimpseserver---------------
$thinport=3000; #port for glimpseserver on thin database
$fatport=3001; #port for glimpseserver on fat database
$glimpseserver="host"; #where are the glimpse servers?
$use_server=1; #or 0
#-----------------------options---------------------------
#$default_search_type = "fat"; #or thin
#$default_case_sensitive = 0; #or 1
#$the_guru = 'username@address';
#########################################################
#########################################################
#
# Nothing to configure below here
#
#########################################################
$default_max_hits = 20;
$glimpse_delim=": ";
require "www_lib.pl";
%FIELDS=&GET_FIELDS();
$my_url=&get_this_URL(); #this may fail on cern httpd
$fat_db_dir="$glimpse_dbs/$db_name/fat";
$thin_db_dir="$glimpse_dbs/$db_name/thin";
$input_field="input_field";
$result_field="result_field";
$match_field="match_field";
$word_field="word_field";
$error_field="error_field";
$case_field="case_field";
@result_set;
$kudos="
This searching capability was brought to you thanks to
the Glimpse
full text search engine.
Please send problem reports or enhancement requests to
the glimpse guru.
";
$this_is_a_subset = 0;
%word_options= (
"Match whole words", "whole",
"Allow fuzzy searching", "partial");
@case_options=(
"Yes",
"No"
);
@match_options= ( "10", "20", "30", "40", "all matches");
%result_options = (
"Just the number of matches", "thin",
"with contextual text", "fat");
@error_options = (0,1,3,5,8);
%TITLE_LIST;
$| = 1; #no buffering
#########################################################
#
#########################################################
sub show_query_form
{
local (@keys);
&PRINT_HEADER ("Search the $title");
@keys = keys %FIELDS;
if (@keys)
{
&h2 ("Search Expression - You didnt enter an expression");
}
else
{
&h2 ("Search Expression");
}
#-------------------------------------------------------------
&form ($my_url);
&nobr();
print "Enter the search expression ";
print "";
&_nobr();
&h3("options");
&ul;
&li("");
&gen_labelled_select (
"Result type",
$result_field,
keys (%result_options));
&li("");
&gen_labelled_select (
"number of matches",
$match_field,
@match_options);
&li("");
&gen_labelled_select (
"Search Type",
$word_field,
keys (%word_options));
&li("");
&gen_labelled_select (
"match case?",
$case_field,
@case_options);
&li(""); #too slow!!!
&gen_labelled_select (
"errors allowed",
$error_field,
@error_options);
&_ul;
$_form;
print "
";
#-------------------------------------------------------------
&h2 ("Notes");
print "The search engine supports regular expressions";
&dl;
&dt;
&bold;
print "Special characters";
&_bold;
ⅆ
print "
The following characters are reserved to the search engine. They should
be escaped by preceeding with a back-slash if you wish to
search for them.
^ \$ * [ ] | ( ) ! \ ; , # < > - .
^ matches the beginning of a line \$ matches the end of a line . matches any single character \# matches any number of characters * matches any number of the previous character";
&p;
&dt;
&bold;
print "Sets";
&_bold;
ⅆ
print "
a set of characters inside [] matches any of the characters
in that set.
[a-ho-z]
is any character between a and h or between o and z.
[^i-n]
matches any character in the character set except characters
'i' to 'n'.
";
&p;
&dt;
&bold;
print "Complex operations";
&_bold;
ⅆ
print "
You can contruct boolean expressions using \"AND\" and
\"OR\". Complex expressions can be built by surrounding
patterns with curly brackets {}.
'{political OR computer} AND science
will match 'political science' or 'computer science'.";
&p;
&dt;
&bold;
print "exact matches";
&_bold;
ⅆ
print "
the default behaviour is to allow mistakes in the words being
searched for. Surrounding an expression in angle brackets < >
forces an exact match on that part of the expression.
<mathemat>ics
matches mathematical with one or more errors allowed
mathe<matics>
does not match mathematical no matter how many errors are allowed.