Brian Tingle tingletech

Calisphere Search

The new index will be testing this indexing stratigey. The same Tokenizer and filters are run when indexing, and on user's queries.

standard tokenizer

This tokenizer splits the text field into tokens, treating whitespace and punctuation as delimiters. Delimiter characters are discarded, with the following exceptions:

Periods (dots) that are not followed by whitespace are kept as part of the token, including Internet domain names.
The "@" character is among the set of token-splitting punctuation, so email addresses are not preserved as single tokens.

	<a href="http://calisphere.cdlib.org">
	<img
	src="http://calisphere.cdlib.org/calisphere_images/calisphere_beta_badge_horizontal.jpg"
	alt="horizontal Calisphere banner"
	border="0"/>
	</a>

	# -- coding: utf-8 --
	# python 3

	# print all unicode chars whose name contains "STAR"
	# 2014-04-14 by 馬曉駿 https://gist.github.com/10622337

	import unicodedata
	from pprint import pprint as pp

	symbols = []

	>>> unicodedata.name(u'Ω')
	'GREEK CAPITAL LETTER OMEGA'
	>>> unicodedata.name(u'Ω')
	'OHM SIGN'

	// Access incoming HTTP request data
	module['exports'] = function accessRequestData (hook) {
	var params = hook.params;
	// params contains all incoming request parameters,
	// such as query string or form data.
	// See http://hook.io/docs#data for more information
	// Responds back with all incoming HTTP params
	hook.res.write(JSON.stringify(hook.params, true, 2));
	hook.res.end();
	};

	// Access incoming HTTP request data
	module['exports'] = function accessRequestData (hook) {
	var params = hook.params;
	// params contains all incoming request parameters,
	// such as query string or form data.
	// See http://hook.io/docs#data for more information
	// Responds back with all incoming HTTP params
	hook.res.write(JSON.stringify(hook.params, true, 2));
	hook.res.end();
	};

	{
	"responseHeader":{
	"status":0,
	"QTime":112,
	"params":{
	"facet.query":"true",
	"q":"-reference_image_md5:[* TO *]",
	"facet.field":"collection_url",
	"indent":"true",
	"fq":"type_ss:\"image\"",

	{
	"label": "墨江老魚図",
	"structMap": [
	{
	"label": "section 1",
	"id": "4430a193-dba3-4701-8eb7-f53769a77449",
	"href": "https://nuxeo-stg.cdlib.org/Nuxeo/nxbigfile/default/4430a193-dba3-4701-8eb7-f53769a77449/file:content/AS134_S01_K.tif",
	"format": "image"
	},
	{

	<xsl:stylesheet
	version="1.0"
	xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
	xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
	xmlns:atom="http://www.w3.org/2005/Atom"
	xmlns:xlink="http://www.w3.org/1999/xlink">

	<!-- xslt for

	ucldc/registry -- load description and collection number into registry