#!/usr/bin/perl
#
#           RiLax 0.1.03
#
# plain text database search engine, 
# version 0.1
# (c) Sergej Tarasov, 2001
#
# Homepage: http://risearch.org/
# email: risearch@risearch.org
# Last modified: 17.02.2002



#===================================================================
#
#         Set variables below 
#
#===================================================================

# Database filename
$DB_NAME = 'filename';

# Records separator in your DB ("\n" for newline).
$record_separator = "\n";

# Fileds separator.
$field_separator  = "::";

# List here fields numbers, which should be indexed.
# Fields numbering starts from 0.
@index_fileds     = qw(0 1 2 3);

# site size
# 1 - Tiny    ~1Mb
# 2 - Medium  ~10Mb
# 3 - Big     ~50Mb
# 4 - Large   >100Mb
$site_size = 2;

# Write queries to log file ("YES" or "NO")
# Please note, you should create directory "log" by hands
# script will not check, if there exist such directory
# Please edit some parameters in file "stat.pl"
$create_log = "YES";

# Path to index database files
$HASH      = "db/0_hash";
$HASHWORDS = "db/0_hashwords";
$SITEWORDS = "db/0_sitewords";
$WORD_IND  = "db/0_word_ind";

#===================================================================
#
#     All other variables are optional. Script should work fine
#  with default settings.
#     These variables controls the indexing process.
#
#===================================================================

#minimum word length to index
$min_length = 3;

# Index or not numbers (set   $numbers = ""   if you don't want to index numbers)
# You may add here other non-letter characters, which you want to index
$numbers = '0-9';

# Indexing scheme
# Whole word - 1
# Beginning of the word - 2
# Every substring - 3
$INDEXING_SCHEME = 2;

# Translate escape chars (like &Egrave; or &#255;) ("YES" or "NO")
$use_esc = "YES";

# List of stopwords ("YES" or "NO")
$use_stop_words = "YES";
@stop_words = qw(
and any are but can had has have her here him his
how its not our out per she some than that the their them then there
these they was were what you
);

#===================================================================
#
#     These variables controls the script output.
#
#===================================================================

# Number of results per page
$res_num=10;


#===================================================================
#
#   Change below only if you need multilanguage support
#   With default settings script will work with
#   English, Russian (win1251 encoding) and most European languages
#
#===================================================================

# Capital letters
$CAP_LETTERS = '\xC0-\xDF\xA8';

# Lower case letters
$LOW_LETTERS = '\xE0-\xFF\xB8';

# If you use Unicode characters in your site in the form
# &#NNNN; (where NNNN>255), uncomment and edit below
# Samples for different languages can be found in file unicode.txt

%code2char = (
# 1040 => "",
# 1041 => "",
# 1042 => "",
# 1043 => "",
);

#===================================================================
#
#            --- end of configuration --- 
#
# Please do not edit below this line unless you know what you do
#
#===================================================================

if ($site_size == 1) { $HASHSIZE = 20001 }
elsif ($site_size == 3) { $HASHSIZE = 100001 }
elsif ($site_size == 4) { $HASHSIZE = 300001 }
else { $HASHSIZE = 50001 }

$field_separator =~ s/([|(){}*?\\\[\]])/\\$1/g;

#===================================================================

sub hash {
    my ($key) = @_;
    my @chars = unpack("C*", $key);
    my $h = 0x00000000;
    my $f = 0xF0000000;
    foreach $char (@chars) {
        $h = ($h << 4) + $char;
        if ($g = $h & $f) { $h ^= $g >> 24 };
        $h &= ~$g;
    }
    $h =  $h % $HASHSIZE;
    return $h
}
#===================================================================

%stop_words = ();
if ($use_stop_words eq "YES") {
    foreach $word (@stop_words) { $stop_words{$word} = "" }
}
#=====================================================================

if ($use_esc eq "YES") { &html_esc() }

sub html_esc {
    %html_esc = (
        "&Agrave;" => chr(192),
        "&Aacute;" => chr(193),
        "&Acirc;" => chr(194),
        "&Atilde;" => chr(195),
        "&Auml;" => chr(196),
        "&Aring;" => chr(197),
        "&AElig;" => chr(198),
        "&Ccedil;" => chr(199),
        "&Egrave;" => chr(200),
        "&Eacute;" => chr(201),
        "&Eirc;" => chr(202),
        "&Euml;" => chr(203),
        "&Igrave;" => chr(204),
        "&Iacute;" => chr(205),
        "&Icirc;" => chr(206),
        "&Iuml;" => chr(207),
        "&ETH;" => chr(208),
        "&Ntilde;" => chr(209),
        "&Ograve;" => chr(210),
        "&Oacute;" => chr(211),
        "&Ocirc;" => chr(212),
        "&Otilde;" => chr(213),
        "&Ouml;" => chr(214),
        "&times;" => chr(215),
        "&Oslash;" => chr(216),
        "&Ugrave;" => chr(217),
        "&Uacute;" => chr(218),
        "&Ucirc;" => chr(219),
        "&Uuml;" => chr(220),
        "&Yacute;" => chr(221),
        "&THORN;" => chr(222),
        "&szlig;" => chr(223),
        "&agrave;" => chr(224),
        "&aacute;" => chr(225),
        "&acirc;" => chr(226),
        "&atilde;" => chr(227),
        "&auml;" => chr(228),
        "&aring;" => chr(229),
        "&aelig;" => chr(230),
        "&ccedil;" => chr(231),
        "&egrave;" => chr(232),
        "&eacute;" => chr(233),
        "&ecirc;" => chr(234),
        "&euml;" => chr(235),
        "&igrave;" => chr(236),
        "&iacute;" => chr(237),
        "&icirc;" => chr(238),
        "&iuml;" => chr(239),
        "&eth;" => chr(240),
        "&ntilde;" => chr(241),
        "&ograve;" => chr(242),
        "&oacute;" => chr(243),
        "&ocirc;" => chr(244),
        "&otilde;" => chr(245),
        "&ouml;" => chr(246),
        "&divide;" => chr(247),
        "&oslash;" => chr(248),
        "&ugrave;" => chr(249),
        "&uacute;" => chr(250),
        "&ucirc;" => chr(251),
        "&uuml;" => chr(252),
        "&yacute;" => chr(253),
        "&thorn;" => chr(254),
        "&yuml;" => chr(255),
        "&nbsp;" => " ",
        "&amp;" => " ",
        "&quote;" => " ",
    );

}
#=====================================================================

sub esc2char {
    my ($esc) = @_;
    my $char = "";
    if ($esc =~ /&[a-zA-Z]*;/) { $char = $html_esc{$esc} }
    elsif ($esc =~ /&#([0-9]*);/) {
    	if ($1 <= 255) { $char = chr($1) }
    	else { $char = $code2char{$1} }
    } elsif ($esc =~ /&#x([0-9a-fA-F]*);/i) {
    	my $code = hex($1);
    	if ($code <= 255) { $char = chr($code) }
    	else { $char = $code2char{$code} }
    }	
    return $char;
}
#=====================================================================



1;