jpountz’s gists

jpountz / LZ4.java

Last active December 10, 2015 20:38

LZ4 compression and decompression

	// Depending on your platform and your JVM, this method will pick the
	// fastest LZ4Factory instance available
	LZ4Factory factory = LZ4Factory.fastestInstance();

	byte[] data = "12345345234572".getBytes("UTF-8");
	final int decompressedLength = data.length;

	// compress data
	LZ4Compressor compressor = factory.fastCompressor();
	// or factory.highCompressor() for slower compression but better compression ratio

jpountz / BlockXXHash.java

Last active December 10, 2015 20:38

xxhash block hashing

	XXHashFactory factory = XXHashFactory.fastestInstance();

	byte[] data = "12345345234572".getBytes("UTF-8");

	XXHash32 hash32 = factory.hash32();
	int seed = 0x9747b28c; // used to initialize the hash value, use whatever
	// value you want, but always the same
	int hash = hash32.hash(data, 0, data.length, seed);
	System.out.println("Block hash: " + hash);

jpountz / StreamingXXHash.java

Last active December 10, 2015 20:38

xxhash streaming hashing

	XXHashFactory factory = XXHashFactory.fastestInstance();

	byte[] data = "12345345234572".getBytes("UTF-8");
	ByteArrayInputStream in = new ByteArrayInputStream(data);

	int seed = 0x9747b28c; // used to initialize the hash value, use whatever
	// value you want, but always the same
	StreamingXXHash32 hash32 = factory.newStreamingHash32(seed);
	byte[] buf = new byte[8]; // for real-world usage, use a larger buffer, like 8192 bytes
	for (;;) {

jpountz / IntroSortExample.java

Last active December 18, 2015 05:59

	// Both arrays must have the same length
	// scores[i] is the score of objects[i]
	final Object[] objects = ...;
	final float[] scores = ...;

	new IntroSorter() {

	float pivotScore;

	@Override

jpountz / gist:6165557

Created August 6, 2013 15:31

light french stemmer with ascii folding

	{
	"settings": {
	"analysis": {
	"analyzer": {
	"french2": {
	"type": "custom",
	"tokenizer": "standard",
	"filter": ["standard","asciifolding","elision","lowercase","stop_fr","light_french_stem"]
	}
	},

jpountz / Recover.java

Last active December 22, 2015 10:48

File to restore a corrupted segment if the stored fields are not corrupted.

	// Set codec, dir and segmentName accordingly to the segment you are trying to restore
	Codec codec = new Lucene42Codec();
	Directory dir = FSDirectory.open(new File("/tmp/test"));
	String segmentName = "_0";

	IOContext ioContext = new IOContext();
	SegmentInfo segmentInfos = codec.segmentInfoFormat().getSegmentInfoReader().read(dir, segmentName, ioContext);
	Directory segmentDir;
	if (segmentInfos.getUseCompoundFile()) {
	segmentDir = new CompoundFileDirectory(dir, IndexFileNames.segmentFileName(segmentName, "", IndexFileNames.COMPOUND_FILE_EXTENSION), ioContext, false);

jpountz / geo.bash

Created September 23, 2013 16:30

	curl -XDELETE localhost:9200/test?pretty
	echo

	curl -XPUT localhost:9200/test?pretty -d '{
	"settings": {
	"analysis" : {
	"analyzer" : {
	"str_search_analyzer" : {
	"tokenizer" : "whitespace",
	"filter" : ["lowercase"]

jpountz / CalcAggregator.java

Last active December 27, 2015 01:49

	/*
	* Licensed to ElasticSearch and Shay Banon under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. ElasticSearch licenses this
	* file to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0

jpountz / highlighting_of_stopwords.sh

Created December 16, 2013 10:24

Highlighting of stop words

	curl -XDELETE 'localhost:9200/test?pretty'
	echo

	curl -XPUT 'localhost:9200/test?pretty' -d '
	{
	"settings" : {
	"index" : {
	"analysis" : {
	"analyzer" : {
	"default" : {

jpountz / aggs_filter.json

Created December 16, 2013 12:16

aggs_filter.json

	curl -XGET "http://localhost:9200/movies/_search" -d'
	{
	"query": {
	"nested": {
	"path": "credits",
	"query": {
	"match": {
	"credits.person_id": 1
	}
	}

Adrien Grand jpountz