jalchr · December 15, 2015 21:29
diff --git a/NGramTest.cs b/NGramTest.cs
 using System;
 using System.Diagnostics;
 using System.IO;
 using System.Linq;
 using Lucene.Net.Analysis;
 using Lucene.Net.Analysis.Standard;
 using Lucene.Net.Analysis.Tokenattributes;
 using Raven.Abstractions.Indexing;
 using Raven.Client;
 using Raven.Client.Indexes;
 using Raven.Tests.Helpers;
 using Xunit;

 namespace RavenTests
 {
    public class NGramTest : RavenTestBase
    {
        public class User
        {
            public string Name { get; set; }
        }

        public class UsersIndex : AbstractIndexCreationTask<User>
        {
            public UsersIndex()
            {
                Map = users => from user in users
                               select new
                                   {
                                       user.Name
                                   };

                Index(x => x.Name, FieldIndexing.Analyzed);
                Analyze(x => x.Name, typeof(NGramAnalyzer).AssemblyQualifiedName);
            }
        }

        [Fact]
        public void Test()
        {
            using (var documentStore = NewDocumentStore())
            {
                documentStore.ExecuteIndex(new UsersIndex());

                using (var session = documentStore.OpenSession())
                {
                    session.Store(new User { Name = "Matt Johnson" });

                    session.SaveChanges();
                }

                WaitForIndexing(documentStore);

                using (var session = documentStore.OpenSession())
                {
                    var searchValues = new[] { "ma", "mat", "att", "jo", "joh", "son" };

                    var allPassed = true;
                    foreach (var value in searchValues)
                    {
                        var results = session.Query<User, UsersIndex>().Search(x => x.Name, value).ToList();
                        var pass = results.Count == 1;
                        Debug.WriteLine("\"{0}\" : {1}", value, pass ? "Pass" : "Fail");
                        if (!pass)
                            allPassed = false;
                    }

                    Assert.True(allPassed);
                }
            }
        }

    }

    [NotForQuerying] 
    public class NGramAnalyzer : Analyzer
    {
        public override TokenStream TokenStream(string fieldName, TextReader reader)
        {
            var tokenizer = new StandardTokenizer(Lucene.Net.Util.Version.LUCENE_30, reader) { MaxTokenLength = 255 };
            TokenStream filter = new StandardFilter(tokenizer);
            filter = new LowerCaseFilter(filter);
            filter = new StopFilter(false, filter, StandardAnalyzer.STOP_WORDS_SET);
            return new NGramTokenFilter(filter, 2, 6);
        }
    }

    public sealed class NGramTokenFilter : TokenFilter
    {
        public static int DefaultMinNgramSize = 1;
        public static int DefaultMaxNgramSize = 2;

        private readonly int _maxGram;
        private readonly int _minGram;
        private readonly IOffsetAttribute _offsetAtt;
        private readonly ITermAttribute _termAtt;

        private int _curGramSize;
        private int _curPos;
        private char[] _curTermBuffer;
        private int _curTermLength;
        private int _tokStart;

        /**
             * Creates NGramTokenFilter with given min and max n-grams.
             * <param name="input"><see cref="TokenStream"/> holding the input to be tokenized</param>
             * <param name="minGram">the smallest n-gram to generate</param>
             * <param name="maxGram">the largest n-gram to generate</param>
             */
        public NGramTokenFilter(TokenStream input, int minGram, int maxGram)
            : base(input)
        {
            if (minGram < 1)
            {
                throw new ArgumentException("minGram must be greater than zero");
            }
            if (minGram > maxGram)
            {
                throw new ArgumentException("minGram must not be greater than maxGram");
            }
            _minGram = minGram;
            _maxGram = maxGram;

            _termAtt = AddAttribute<ITermAttribute>();
            _offsetAtt = AddAttribute<IOffsetAttribute>();
        }

        /**
             * Creates NGramTokenFilter with default min and max n-grams.
             * <param name="input"><see cref="TokenStream"/> holding the input to be tokenized</param>
             */

        public NGramTokenFilter(TokenStream input)
            : this(input, DefaultMinNgramSize, DefaultMaxNgramSize)
        {
        }

        /** Returns the next token in the stream, or null at EOS. */

        public override bool IncrementToken()
        {
            while (true)
            {
                if (_curTermBuffer == null)
                {
                    if (!input.IncrementToken())
                    {
                        return false;
                    }
                    _curTermBuffer = (char[])_termAtt.TermBuffer().Clone();
                    _curTermLength = _termAtt.TermLength();
                    _curGramSize = _minGram;
                    _curPos = 0;
                    _tokStart = _offsetAtt.StartOffset;
                }
                while (_curGramSize <= _maxGram)
                {
                    while (_curPos + _curGramSize <= _curTermLength)
                    {
                        // while there is input
                        ClearAttributes();
                        _termAtt.SetTermBuffer(_curTermBuffer, _curPos, _curGramSize);
                        _offsetAtt.SetOffset(_tokStart + _curPos, _tokStart + _curPos + _curGramSize);
                        _curPos++;
                        return true;
                    }
                    _curGramSize++; // increase n-gram size
                    _curPos = 0;
                }
                _curTermBuffer = null;
            }
        }

        public override void Reset()
        {
            base.Reset();
            _curTermBuffer = null;
        }
    }
 }
	using System;
	using System.Diagnostics;
	using System.IO;
	using System.Linq;
	using Lucene.Net.Analysis;
	using Lucene.Net.Analysis.Standard;
	using Lucene.Net.Analysis.Tokenattributes;
	using Raven.Abstractions.Indexing;
	using Raven.Client;
	using Raven.Client.Indexes;
	using Raven.Tests.Helpers;
	using Xunit;

	namespace RavenTests
	{
	public class NGramTest : RavenTestBase
	{
	public class User
	{
	public string Name { get; set; }
	}

	public class UsersIndex : AbstractIndexCreationTask<User>
	{
	public UsersIndex()
	{
	Map = users => from user in users
	select new
	{
	user.Name
	};

	Index(x => x.Name, FieldIndexing.Analyzed);
	Analyze(x => x.Name, typeof(NGramAnalyzer).AssemblyQualifiedName);
	}
	}

	[Fact]
	public void Test()
	{
	using (var documentStore = NewDocumentStore())
	{
	documentStore.ExecuteIndex(new UsersIndex());

	using (var session = documentStore.OpenSession())
	{
	session.Store(new User { Name = "Matt Johnson" });

	session.SaveChanges();
	}

	WaitForIndexing(documentStore);

	using (var session = documentStore.OpenSession())
	{
	var searchValues = new[] { "ma", "mat", "att", "jo", "joh", "son" };

	var allPassed = true;
	foreach (var value in searchValues)
	{
	var results = session.Query<User, UsersIndex>().Search(x => x.Name, value).ToList();
	var pass = results.Count == 1;
	Debug.WriteLine("\"{0}\" : {1}", value, pass ? "Pass" : "Fail");
	if (!pass)
	allPassed = false;
	}

	Assert.True(allPassed);
	}
	}
	}

	}

	[NotForQuerying]
	public class NGramAnalyzer : Analyzer
	{
	public override TokenStream TokenStream(string fieldName, TextReader reader)
	{
	var tokenizer = new StandardTokenizer(Lucene.Net.Util.Version.LUCENE_30, reader) { MaxTokenLength = 255 };
	TokenStream filter = new StandardFilter(tokenizer);
	filter = new LowerCaseFilter(filter);
	filter = new StopFilter(false, filter, StandardAnalyzer.STOP_WORDS_SET);
	return new NGramTokenFilter(filter, 2, 6);
	}
	}

	public sealed class NGramTokenFilter : TokenFilter
	{
	public static int DefaultMinNgramSize = 1;
	public static int DefaultMaxNgramSize = 2;

	private readonly int _maxGram;
	private readonly int _minGram;
	private readonly IOffsetAttribute _offsetAtt;
	private readonly ITermAttribute _termAtt;

	private int _curGramSize;
	private int _curPos;
	private char[] _curTermBuffer;
	private int _curTermLength;
	private int _tokStart;

	/**
	* Creates NGramTokenFilter with given min and max n-grams.
	* <param name="input"><see cref="TokenStream"/> holding the input to be tokenized</param>
	* <param name="minGram">the smallest n-gram to generate</param>
	* <param name="maxGram">the largest n-gram to generate</param>
	*/
	public NGramTokenFilter(TokenStream input, int minGram, int maxGram)
	: base(input)
	{
	if (minGram < 1)
	{
	throw new ArgumentException("minGram must be greater than zero");
	}
	if (minGram > maxGram)
	{
	throw new ArgumentException("minGram must not be greater than maxGram");
	}
	_minGram = minGram;
	_maxGram = maxGram;

	_termAtt = AddAttribute<ITermAttribute>();
	_offsetAtt = AddAttribute<IOffsetAttribute>();
	}

	/**
	* Creates NGramTokenFilter with default min and max n-grams.
	* <param name="input"><see cref="TokenStream"/> holding the input to be tokenized</param>
	*/

	public NGramTokenFilter(TokenStream input)
	: this(input, DefaultMinNgramSize, DefaultMaxNgramSize)
	{
	}

	/** Returns the next token in the stream, or null at EOS. */

	public override bool IncrementToken()
	{
	while (true)
	{
	if (_curTermBuffer == null)
	{
	if (!input.IncrementToken())
	{
	return false;
	}
	_curTermBuffer = (char[])_termAtt.TermBuffer().Clone();
	_curTermLength = _termAtt.TermLength();
	_curGramSize = _minGram;
	_curPos = 0;
	_tokStart = _offsetAtt.StartOffset;
	}
	while (_curGramSize <= _maxGram)
	{
	while (_curPos + _curGramSize <= _curTermLength)
	{
	// while there is input
	ClearAttributes();
	_termAtt.SetTermBuffer(_curTermBuffer, _curPos, _curGramSize);
	_offsetAtt.SetOffset(_tokStart + _curPos, _tokStart + _curPos + _curGramSize);
	_curPos++;
	return true;
	}
	_curGramSize++; // increase n-gram size
	_curPos = 0;
	}
	_curTermBuffer = null;
	}
	}

	public override void Reset()
	{
	base.Reset();
	_curTermBuffer = null;
	}
	}
	}