Lukhnos Liu lukhnos

Whoosh's default analyzer does not handle CJK characters (in particular Chinese and Japanese) well. If you pass typical Chinese or Japanese paragraphes, often you'll find an entire sentence is treated as one token.

A Whoosh analyzer is consists of one tokenizer and zero or more filters. As a result, we can easily use this recipe from Lucene's CJKAnalyzer:

An Analyzer that tokenizes text with StandardTokenizer, normalizes content with CJKWidthFilter, folds case with LowerCaseFilter, forms bigrams of CJK with CJKBigramFilter, and filters stopwords with StopFilter

Which inspired me to make this first take:

class CJKFilter(Filter):
    def __call__(self, tokens):

	/**
	* This demonstrates the leaks in ReferenceQueue.
	*
	* To run the demo, compile the source with:
	*
	* j2objc ReferenceQueueLeaks.java
	* j2objcc ReferenceQueueLeaks.m
	*
	* Then use the Leaks tool in Instruments and have it run "a.out ReferenceQueueLeaks".
	*/

	// Solution 2: Use OCNI.

	import java.util.Arrays;
	import java.util.Iterator;
	import java.util.List;

	public class FastEnumGotcha {
	static class Wrapper<T> {
	T value;

	// Solution 1: Use annotation.
	import com.google.j2objc.annotations.LoopTranslation;

	import java.util.Arrays;
	import java.util.Iterator;
	import java.util.List;

	public class FastEnumGotcha {
	static class Wrapper<T> {
	T value;

	// This works perfectly as a Java program, but produces wrong output when
	// translated into Objective-C. Can you spot why?

	import java.util.Arrays;
	import java.util.Iterator;
	import java.util.List;

	public class FastEnumGotcha {
	static class Wrapper<T> {
	T value;

	import com.google.j2objc.annotations.AutoreleasePool;

	import java.io.File;
	import java.io.FileOutputStream;
	import java.io.IOException;
	import java.nio.channels.FileChannel;
	import java.nio.channels.FileLock;

	public class NIOLeaks {
	public static void main(String args[]) {

	import com.google.j2objc.annotations.AutoreleasePool;

	public class ThrowableLeaks {
	public static void main(String args[]) {
	for (int i = 0; ; i++) {
	foo(i);
	bar(i);
	try {
	Thread.sleep(1000);
	} catch (Exception e) {

	apply plugin: 'java'

	repositories {
	mavenCentral()
	}

	dependencies {
	// ...
	}

	// To reproduce the bug:
	//
	// j2objc PreadBug.java
	// j2objcc PreadBug.m
	// ./a.out PreadBug
	//
	// You'll see the following exception:
	//
	// java.io.IOException: pread failed: EBADF (Bad file descriptor)

	import argparse
	import json

	import pyotp


	def main():
	parser = argparse.ArgumentParser()
	parser.add_argument('json', nargs=1)
	parser.add_argument('key', nargs=1)