# Blosxom Plugin: storyfilter
# Author: Mike Downey
# Version: 0.39

package storyfilter; # Needs to be loaded after the 'meta' plugin

# usage: add keyword=xyz to the query string to display any stories
# which contain that keyword in in the meta-keywords variable.
# Keywords are saved to a disk cache as and when they are viewed.
# if $categoryiskeyword == 1 then /keyword works as well (read the notes first)
# ?refresh=y refreshes keyword cache for any currently displayed stories
# (same as using $refresh = 1)

# using /category stops $breadcrumbs and $menu from showing the proper subcategories
# because of the way it clears the $path variable

# -------- Configure variables --------

my $keywordfile = "$blosxom::plugin_state_dir/keyword.cache";

my $categoryiskeyword = 1;	# allows /keyword as well as ?keyword=
my $autokeywords = 1;		# picks keywords from categories
my $refresh = 0;		# refreshes any entries as they are displayed
my $reindex = 1;		# does a full refresh on ?reindex=y as well

# -------------------------------------

use CGI qw/:standard/;
use Fcntl qw/:flock/;

my %keyword_stories,$keywordlink;
$keywordfilter="";
my $filechanged = 0;
my $localkeywords;	# Keywords for current story

$keywordlinks;
$allkeywords="";


sub start {
	# Read in the keyword cache
	open KWF, $keywordfile;
	flock KWF, LOCK_SH;
	while(my $line=<KWF>){
		my ($fn,$kw) = split ("#",$line);
		$keyword_stories{$fn}=$kw;
	}
	close KWF;

	# get the keyword to filter on from CGI
	$keywordfilter = CGI::param("keyword");

	# if the path is a single word (no subcategories), doesn't contain a . (ie not a story)
	# and no keyword is selected then use that as a keyword
	# This bit breaks the 'menu' plugin and stops it showing subcategories
	if ($categoryiskeyword && ($blosxom::path_info !~ m#/|\.#) && !$keywordfilter){
		$keywordfilter=$blosxom::path_info;
		$blosxom::path_info = ""; 		# Naughty! these variables should be read-only!
	}

	# use /keyword or ?keyword= as appropriate
	$keywordlink=$categoryiskeyword?"/":"?keyword=";
	
	# decides whether we refresh the cache for visible stories
	$refresh ||= (CGI::param("refresh") eq "y");

	1;
}

sub filter {
	my ($pkg, $files_ref) = @_;
	my $storypath,@pathelements;

	# Rebuild entire cache file if required
	if ( (CGI::param("refresh") eq "full") or !(-e $keywordfile) or
	     ((CGI::param("reindex") eq "y") and $reindex)){
		%keyword_stories = {};
		foreach $storyfn (keys %$files_ref){
			my $keywordlist = "";

			if ($autokeywords){
				# now need to extract the path from the full pathname
				($storypath = $storyfn) =~ s/$blosxom::datadir//;
				@pathelements = split("/",$storypath);
				delete $pathelements[-1];
				# and add each part of the path to the keywords file
				foreach $storypath (@pathelements){
					$keywordlist .= $storypath.",";
				}
			}
			# extract the keywords from the meta tag
			open STF, $storyfn;
			my $line = <STF>; # skip the first line (story title)
			while ($line = <STF>){
				chomp $line;
				if (substr($line,0,5) ne "meta-")
					{last;} # skip to next story if no meta.
				if (substr($line,0,14) eq "meta-keywords:"){
					$keywordlist .= (split(": ",$line))[1];
				}
			}
			close STF;
			$keywordlist =~ s/\s//g;	# strip whitespace
			$keyword_stories{$storyfn}=$keywordlist;
		}
		$filechanged = 1;	# re-write file at end
	}

	if ($keywordfilter){
		my $storyfn;

		# If we want to filter on a keyword, go ahead and do it
		# searches for ,keyword or at the start of list because there
		# is often a preceding comma.
		# This stops, eg. 'work' finding 'network' but 'review' still finds 'reviews'
		foreach $storyfn (keys %$files_ref){
			if (!($keyword_stories{$storyfn} =~ /,$keywordfilter|^$keywordfilter/i))
				{ delete $files_ref->{$storyfn}; }
		}
	}
	1;
}

# for each story, take the keywords and add to the cache array
sub story {
	my($pkg, $path, $filename, $story_ref, $title_ref, $body_ref) = @_;

	# Add the path elements to the meta::keywords
	if ($blosxom::path && $autokeywords)
		{ ($localkeywords = $meta::keywords.$blosxom::path) =~ s#/#,#g; }

	# Add the keywords to the cache array (only if we need to)
	if (defined $localkeywords){
		my $storyfilename = "$blosxom::datadir$path/$filename.txt";
		if ((!$keyword_stories{$storyfilename}) or $refresh) {#
			$localkeywords  =~ s/\s//g;
			$keyword_stories{$storyfilename}=$localkeywords;
			$filechanged = 1;
		}
	}

	# Build up the keyword links
	my @words = split (",",$localkeywords);
	my %wordlinks, $word;
	foreach $word (@words){
		$word =~ s/\s//g;		# strip cf, lf and spaces
		my $link = makelink($word);
		if ($word) # ignore blank lines
			{$wordlinks{lc $word} = $link;}
	}
	if ($localkeywords) 
		{$keywordlinks = "Keywords: ".join("&nbsp;", values %wordlinks);}

	$localkeywords="";	# Need to reset this otherwise keywords build cumulatively down the page

	if (defined $meta::showkeywords){	# Build list of all keywords and frequencies eg. for site map
		my %keywordfrequency,$fn,$kw;
		foreach $fn (keys %keyword_stories){
			foreach $kw (split(",",$keyword_stories{$fn})){
				$kw =~ s/\s//g;	# strip all whitespace
				if ($kw){$keywordfrequency{lc $kw}++;}
			}
		}
		
		foreach $kw (sort keys %keywordfrequency){
			$allkeywords .= makelink($kw)." ($keywordfrequency{$kw})<br>\n";
		}

	}

	1;
}

sub makelink(){
	my $word = shift;
	my $displayword,$link;

	$displayword = $word;
	$displayword =~ s/_|-/ /g;	# convert -_ to space
	return "<a href=\"$blosxom::url$keywordlink$word\">". ucfirst $displayword ."</a>";
}

# If the cache file has changed, write it to disk
sub last {
	my $fn,$kw;
	if ($filechanged) {
		open KWF,"> $keywordfile";
		flock KWF,LOCK_EX;
		foreach $fn (keys %keyword_stories){
			$kw = $keyword_stories{$fn};
			if ($kw){print KWF "$fn#$kw\n"}
			}
		close KWF;
	}
	1;
}

1;

__END__

=head1 NAME

Blosxom plugin: storyfilter

=head1 SYNOPSIS

Filters stories according to a keyword provided on the URL.

=head1 VERSION

V0.39 (17 September 2006)

=head1 AUTHOR

Mike Downey (www.mikedowney.co.uk)

=head1 BUGS

Setting the $categoryiskeyword variable to 1 will break any plugins which make
use of the $blosxom::path_info variable (which is quite a lot of them).

=head1 USAGE

=head2 Installation

Simply copy into the plugins directory. If you want the plugin to pick the keywords
from new stories as they are posted (rather than refresh the entire keyword cache
each time), then the meta plugin is required as well.

In each story, add the line I<meta-keywords:> followed by comma separated keyword list to
the beginning of the story, eg.

meta-keywords: food,drink,sandwiches

=head2 Configuration

$categoryiskeyword = 1 :
If the category/directory to be viewed is only one level deep
i.e. no subdirectories, then the directory name will be taken
as the keyword, eg. blosxom.cgi/coventry will be the same as
blosxom.cgi?keyword=coventry. This requires '$autokeywords=1'
to preserve the normal category view.

$autokeywords = 1 :
The plugin will take the path to the story and add the categories
to the keywords, eg. 'blosxom.cgi/days_out/beach' will cause
'days_out' and 'beach' to be added to the keywords for the stories
in that directory.

$refresh = 1 :
Every time a story is viewed, the keywords are written to the cache file.

$refresh = 0 :
Only writes the keywords to the file when the file is first viewed. If the
keywords are changed in the story, the changes will not be saved.

=head1 NOTES


The filter gathers keyword information from the stories and allows the user
to selectively view stories according to keyword. The keywords are defined
by adding
	meta-keywords: comma separated keyword list
The 'storyfilter' plugin uses the 'meta' plugin to convert the list into
a variable which it can access.
To view stories according to keyword, append keyword=theword to the URL,
	e.g blosxom.cgi?keyword=coventry
will view only stories which use the keyword coventry. (note, keywords are
case sensitive).

The plugin will now build the keywords cache file in its entirety if it
doesn't already exist. A forced rebuild may be done by using ?refresh=full
at the end of the url.

The $categoryiskeyword should be treated as experimental. It hasn't been
fully implemented or tested and may have some strange behaviour or effect
on other filters. One such problem is with the menu
plugin. When viewing a category which is one level frop the top, it causes
the top level menu to be displayed rather than the subcategories of the
current view.
