Skip to content

Instantly share code, notes, and snippets.

@philipsoutham
Created August 30, 2013 06:05
Show Gist options
  • Save philipsoutham/6386741 to your computer and use it in GitHub Desktop.
Save philipsoutham/6386741 to your computer and use it in GitHub Desktop.
A reference for a question sent to the mailing list.
package golucy
// Copyright 2013 Philip Southam
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
/*
#include <stdlib.h>
#include "Clownfish/Obj.h"
#define DECREF cfish_Obj_decref
#define ObjToString CFISH_Obj_To_String
#include "Lucy/Search/IndexSearcher.h"
#define LucyIndexSearcher lucy_IndexSearcher
#define LucyIxSearcherNew lucy_IxSearcher_new
#define LucyIxSearcherHits LUCY_IxSearcher_Hits
#define LucyIxSearcherGetSchema LUCY_IxSearcher_Get_Schema
#define LucyIxSearchFetchDocVec LUCY_IxSearcher_Fetch_Doc_Vec
#include "Lucy/Analysis/EasyAnalyzer.h"
#define LucyEasyAnalyzerNew lucy_EasyAnalyzer_new
#include "Lucy/Plan/Schema.h"
#define LucySchema lucy_Schema
#define LucySchemaAllFields LUCY_Schema_All_Fields
#include "Lucy/Search/QueryParser.h"
#define LucyQueryParser lucy_QueryParser
#define LucyQParserNew lucy_QParser_new
#define LucyQParserParse LUCY_QParser_Parse
#include "Lucy/Document/HitDoc.h"
#define LucyHitDoc lucy_HitDoc
#define LucyHitDocExtract LUCY_HitDoc_Extract
#define LucyHitDocGetDocId LUCY_HitDoc_Get_Doc_ID
#include "Lucy/Search/Hits.h"
#define LucyHitsTotal LUCY_Hits_Total_Hits
#define LucyHitsNext LUCY_Hits_Next
#include "Lucy/Search/Query.h"
#define LucyQuery lucy_Query
#define LucyQueryMakeCompiler LUCY_Query_Make_Compiler
#include "Lucy/Search/Compiler.h"
#define LucyCompilerHighlightSpans LUCY_Compiler_Highlight_Spans
#include "Clownfish/VArray.h"
#define VaGetSize CFISH_VA_Get_Size
#define VaFetch CFISH_VA_Fetch
#include "Clownfish/CharBuf.h"
#define CFishCharBuf cfish_CharBuf
extern char* cfish_cb_ptr2char(const CFishCharBuf * field) {
return (char*)CFISH_CB_Get_Ptr8(field);
}
extern CFishCharBuf* CB_newf(const char* pattern) {
return cfish_CB_newf(pattern);
}
*/
import "C"
import (
"log"
"unsafe"
)
type Query struct {
QueryStr string
lucySchema *C.LucySchema // we're now carrying this around in 2 places :-/
lucyQuery *C.LucyQuery
}
type IndexReader struct {
Index *Index
lucySearcher *C.LucyIndexSearcher
}
func (index *Index) NewIndexReader() *IndexReader {
ixLocation := cb_newf(index.Path)
defer C.DECREF(ixLocation)
return &IndexReader{Index: index, lucySearcher: C.LucyIxSearcherNew(ixLocation)}
}
func (ixReader *IndexReader) ParseQuery(queryStr string) *Query {
lucySchema := C.LucyIxSearcherGetSchema(ixReader.lucySearcher)
language := cb_newf("en") // should be configurable
defer C.DECREF(language)
analyzer := C.LucyEasyAnalyzerNew(language)
defer C.DECREF(analyzer)
qp := C.LucyQParserNew(
lucySchema,
analyzer, //should this be configurable?
cb_newf("AND"), // should be configurable
C.LucySchemaAllFields(lucySchema), // should be configurable
)
defer C.DECREF(qp)
return &Query{
QueryStr: queryStr,
lucySchema: lucySchema,
lucyQuery: C.LucyQParserParse(qp, cb_new_from_utf8(queryStr)),
}
}
func (ixReader *IndexReader) Search(query *Query, offset, limit uint, field string) (uint, []string) {
// Need to add `includeMatchedTerms bool` parameter. Then figure out a
// way to extract the matched terms. Should probably have some sort
// of `Results` object/iterator so that we don't have to specify
// offset/limit and where I can attach matched terms to the result.
getField := cb_newf(field) // total hack, need to return more than one field
defer C.DECREF(getField)
hits := C.LucyIxSearcherHits(ixReader.lucySearcher, query.lucyQuery, C.uint32_t(offset), C.uint32_t(limit), nil)
defer C.DECREF(hits)
totalNumHits := uint(C.LucyHitsTotal(hits))
num2Return := minUInt(limit, totalNumHits)
results := make([]string, num2Return)
var hit *C.LucyHitDoc
//new
compiler := C.LucyQueryMakeCompiler(query.lucyQuery, ixReader.lucySearcher, 1.0, false)
defer C.DECREF(compiler)
for i := uint(0); i < num2Return; i++ {
hit = C.LucyHitsNext(hits)
if hit == nil {
break
}
docId := C.LucyHitDocGetDocId(hit)
docVec := C.LucyIxSearchFetchDocVec(ixReader.lucySearcher, docId)
spans := C.LucyCompilerHighlightSpans(compiler, ixReader.lucySearcher, docVec, getField)
log.Printf("%d", int(C.VaGetSize(spans)))
span := C.VaFetch(spans, 0)
cbSpan := C.ObjToString(span)
chrSpan := cb_ptr2char(cbSpan)
log.Println(C.GoString(chrSpan))
value_cb := C.LucyHitDocExtract(hit, getField, nil) // do i need to free this, what does the nil do?
value := cb_ptr2char(value_cb) // do i need to free this
results[i] = C.GoString(value)
C.DECREF(hit)
C.DECREF(docVec)
}
return num2Return, results
}
func (ixReader *IndexReader) Close() {
C.DECREF(ixReader.lucySearcher)
}
func (query *Query) Close() {
C.DECREF(query.lucySchema)
C.DECREF(query.lucyQuery)
}
func cb_newf(s string) *C.CFishCharBuf {
cString := C.CString(s)
defer C.free(unsafe.Pointer(cString))
return C.CB_newf(cString)
}
func cb_new_from_utf8(s string) *C.CFishCharBuf {
val := C.CString(s)
defer C.free(unsafe.Pointer(val))
vlen := len(s)
return C.cfish_CB_new_from_utf8(val, (C.size_t)(vlen))
}
func cb_ptr2char(field *C.CFishCharBuf) *C.char {
return C.cfish_cb_ptr2char(field)
}
func minUInt(x, y uint) uint {
if x < y {
return x
}
return y
}
@philipsoutham
Copy link
Author

From the devel branch of golucy

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment