#!/usr/bin/perl -w use strict; use File::Find; use DB_File; $|++; my $WEB = "/home/user/public_html/"; my $HOST = "http://www.easyya.com/"; # HOST and webdir must keep the same trailing ############################################################### my %db; my $fileno =0; unlink("search_index.db"); dbmopen(%db,"search_index.db",0644) or die "dbmopen: $!"; finddepth(\&wanted,$WEB); sub wanted { return if ($File::Find::dir =~ /usage/i); return unless /\.(html|txt)$/; my $filename = $File::Find::name; return if -l $filename; print "indexing No.$fileno $filename\n"; my $last_mod = localtime( (stat)[9] ); my ($t1,$m,$d,$t2,$y) = split /\s+/,$last_mod; $last_mod = $m." ".$d.", ".$y; my $parser = MyParser->new; $parser->parse_file($filename); # store link and title of the file into database my $title =$parser->{title}; $title = $_ if ( !(defined $title) or $title=~ /^\s+$/); $filename =~ s/$WEB/$HOST/; $db{-$fileno} = "$filename<>$title<>$last_mod"; # extract words from file & store in database my $last = ""; my @words = $parser->{TEXT}=~ /\w+/g; foreach my $word (sort @words) { next if lc $word eq $last; $last = $word; $db{$word} .= "-$fileno"; } $fileno++; } BEGIN{ package MyParser; require HTML::Parser; @MyParser::ISA = qw(HTML::Parser); sub start { my ($self,$tag) = @_; $self->{TITLE} = "defined" if ($tag eq 'title'); } sub end { my ($self,$tag) = @_; undef $self->{TITLE} if ($tag eq 'title' && $self->{TITLE}); } sub text { my ($self,$text) = @_; $self->{title} = $text if ($self->{TITLE}); $self->{TEXT} .= $text; } } untie %db; 1; __END__