Created
September 5, 2012 19:10
-
-
Save anonymous/3642781 to your computer and use it in GitHub Desktop.
KinoSearch Example
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package MyApp::Web::Controller::Search; | |
use 5.010; | |
use Moose; | |
use namespace::autoclean; | |
BEGIN { extends 'Catalyst::Controller'; } | |
sub index : Path : Args(0) { | |
my ( $self, $c ) = @_; | |
my $hits = $c->model('Search')->search( $c->req->param('q') // '' ); | |
$c->log->debug("Search matched @{[ $hits->{total_hits} ]} items"); | |
$c->stash( | |
meta_title => 'Search', | |
template => 'main/search.mc', | |
hits => $hits, | |
); | |
} | |
__PACKAGE__->meta->make_immutable; | |
1; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env perl | |
# ABSTRACT: Document indexer for search engine | |
use v5.10; | |
use strict; | |
use warnings; | |
use FindBin::libs; | |
use Config::JFDI; | |
use KSx::Simple; | |
use HTML::Strip; | |
use Path::Class; | |
use MyApp::Schema::CMS; | |
my $config = Config::JFDI->new( name => 'myapp_web' )->get; | |
# Get CMS schema info from config | |
my $schema = | |
MyApp::Schema::CMS->connect( $config->{'Model::CMSdb'}->{connect_info} ); | |
my $pages = $schema->resultset('Page'); | |
# Get indexer info from the config | |
my $path_to_index = dir( $config->{'Model::Search'}->{index} ); | |
say "Index is: $path_to_index"; | |
$path_to_index->rmtree if -e $path_to_index; | |
$path_to_index->mkpath; | |
# Create the analyzer and the Inverted Indexer | |
my $index = KSx::Simple->new( | |
path => $path_to_index, | |
language => 'en', | |
); | |
# Populated the Inverted Index from all pages in the CMS index | |
my $stripper = HTML::Strip->new( | |
decode_entities => 1, | |
emit_spaces => 1, | |
striptags => [qw(title style script applet sup form)] ); | |
my $pages_rs = $pages->search( { visible => 1 } ); | |
while ( my $page = $pages_rs->next ) { | |
my $title = $page->section('title')->live->html; | |
my $contents = $page->section('content')->live->html; | |
my $url = "/page/" . $page->url; | |
# Strip out any tags | |
$title = trim( $stripper->parse($title) ); | |
$contents = trim( $stripper->parse($contents) ); | |
# Report what we are doing | |
say "Add: $url"; | |
say " => $title"; | |
say " -> ", do { | |
my $x = substr( $contents, 0, 70 ); | |
$x =~ s/\n/ /g; | |
$x =~ s/\s+/ /g; | |
$x; | |
}, "..."; | |
$index->add_doc({ | |
title => $title, | |
url => $url, | |
content => $contents, | |
}); | |
} | |
sub trim { | |
my $str = shift; | |
$str =~ s{^\s+|\s+$}{}g; | |
return $str; | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package MyApp::Web::Model::Search; | |
use Moose; | |
use namespace::autoclean; | |
extends 'Catalyst::Model'; | |
use KSx::Simple; | |
use KinoSearch::Highlight::Highlighter; | |
has 'index' => ( is => 'ro', isa => 'Str', ); | |
has '_index' => ( | |
is => 'ro', | |
isa => 'KSx::Simple', | |
lazy => 1, | |
default => sub { | |
my $self = shift; | |
return KSx::Simple->new( | |
path => $self->index, | |
language => 'en', | |
); | |
} ); | |
sub search { | |
my ( $self, $q, $offset, $hits_per_page ) = @_; | |
$offset //= 0; | |
$hits_per_page //= 100; # TODO: get from config | |
# TODO: implement paging of search results | |
my $index = $self->_index; | |
my $total_hits = $index->search( | |
query => $q, | |
offset => $offset, | |
num_wanted => $hits_per_page, | |
); | |
my $highlighter = KinoSearch::Highlight::Highlighter->new( | |
searcher => $index->{searcher}, # Non-documented | |
query => $q, | |
field => 'content', | |
); | |
my $results = { | |
total_hits => $total_hits, | |
hits => [] }; | |
while ( my $hit = $index->next ) { | |
push @{ $results->{hits} }, | |
{ | |
excerpt => $highlighter->create_excerpt($hit), | |
url => $hit->{url}, | |
score => $hit->get_score, | |
title => $hit->{title}, | |
}; | |
} | |
return $results; | |
} | |
__PACKAGE__->meta->make_immutable; | |
1; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env perl | |
# ABSTRACT: Command line search test script | |
use v5.10; | |
use strict; | |
use warnings; | |
use Config::JFDI; | |
use KSx::Simple; | |
use KinoSearch::Highlight::Highlighter; | |
use Path::Class; | |
use Data::Dump; | |
my $config = Config::JFDI->new( name => 'myapp_web' ); | |
my $path_to_index = $config->get->{'Model::Search'}->{index}; | |
say "Index is: $path_to_index"; | |
file($path_to_index)->dir->mkpath; | |
# Create the analyzer and the Inverted Indexer | |
my $index = KSx::Simple->new( | |
path => $path_to_index, | |
language => 'en', | |
); | |
my $query = 'the'; | |
my $total_hits = $index->search( | |
query => $query, | |
offset => 0, | |
num_wanted => 10, | |
); | |
dd $index; | |
my $highlighter = KinoSearch::Highlight::Highlighter->new( | |
searcher => $index->{searcher}, | |
query => $query, | |
field => 'content', | |
); | |
print "Total hits: $total_hits\n"; | |
print "Total hits: " . $index->{hits}->total_hits ."\n"; | |
while ( my $hit = $index->next ) { | |
my $excerpt = $highlighter->create_excerpt($hit); | |
print "Title: $hit->{title}\n",; | |
print "URL: $hit->{url}\n",; | |
print "Excerpt: $excerpt\n",; | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<%args> | |
$hits | |
</%args> | |
<h2>Search Results</h2> | |
% if( $hits->{total_hits} > 0 ) { | |
<ul> | |
% for my $hit ( @{$hits->{hits} }){ | |
<li> | |
<a href="<% $hit->{url} %>"> <% $hit->{title} %> </a> | |
(score: <% sprintf "%0.2f", 100 * $hit->{score} %>) <br/> | |
<% $hit->{excerpt} |n %> <br/> | |
%# <span class="excerptURL"><% $hit->{url} %></span> | |
</li> | |
% } | |
</ul> | |
% } else { | |
<p>Sorry, no results were found for that search. One of the following | |
pages may be a good starting point:</p> | |
% } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment