ncruces · June 7, 2018 10:40
diff --git a/SearchIterator.java b/SearchIterator.java
 /*
 *******************************************************************************
 * Copyright (C) 1996-2000, International Business Machines Corporation and    *
 * others. All Rights Reserved.                                                *
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/SearchIterator.java,v $ 
 * $Date: 2002/04/03 19:13:56 $ 
 * $Revision: 1.6 $
 *
 *****************************************************************************************
 */

 package com.ibm.icu.text;

 import java.text.BreakIterator;
 import java.text.CharacterIterator;

 /**
 * <code>SearchIterator</code> is an abstract base class that provides methods
 * to search for a pattern within a text string.  Instances of
 * <code>SearchIterator</code> maintain a current position and scan over
 * the target text, returning the indices the pattern is matched
 * and the length of each match.
 * <p>
 * <code>SearchIterator</code> is an abstract base class that defines a
 * protocol for text searching.  Subclasses provide concrete implementations of
 * various search algorithms.  For example, {@link StringSearch}
 * implements language-sensitive pattern matching based on the comparison rules
 * defined in a {@link java.text.RuleBasedCollator RuleBasedCollator} object.
 * <p>
 * Internally, <code>SearchIterator</code> scans text using a
 * {@link CharacterIterator}, and is thus able to scan text held
 * by any object implementing that protocol. A <code>StringCharacterIterator</code>
 * is used to scan <code>String</code> objects passed to <code>setText</code>.
 * <p>
 * <code>SearchIterator</code> provides an API that is similar to that of
 * other text iteration classes such as <code>BreakIterator</code>.  Using this
 * class, it is easy to scan through text looking for all occurances of a
 * given pattern.  The following example uses a <code>StringSearch</code> object to
 * find all instances of "fox" in the target string.  Any other subclass of
 * <code>SearchIterator</code> can be used in an identical manner.
 * <pre><code>
 * String target = "The quick brown fox jumped over the lazy fox";
 * String pattern = "fox";
 *
 * SearchIterator iter = new StringSearch(pattern, target);
 *
 * for (int pos = iter.first(); pos != SearchIterator.DONE; pos = iter.next()) {
 *     System.out.println("Found match at " + pos +
 *                        ", length is " + iter.getMatchLength());
 * }
 * </code></pre>
 *
 * @see StringSearch
 */
 public abstract class SearchIterator {
    /**
     * DONE is returned by previous() and next() after all valid
     * matches have been returned, and by first() and last() if
     * there are no matches at all.
     */
    public static final int DONE = -1;
    
    /**
     * Private value indicating that the iterator is pointing
     * before the beginning of the target text.
     */
    private static final int BEFORE = -2;

    /**
     * Return the first index at which the target text matches the search
     * pattern.  The iterator is adjusted so that its current index
     * (as returned by {@link #getIndex}) is the match posisition if one was found
     * and <code>DONE</code> if one was not.
     *
     * @return The character index of the first match, or <code>DONE</code> if there
     *          are no matches.
     */
    final public int first() {
        setIndex(BEFORE);
        return next();
    }

    /**
     * Return the first index greater than <tt>pos</tt> at which the target
     * text matches the search pattern.   The iterator is adjusted so that its current index
     * (as returned by {@link #getIndex}) is the match posisition if one was found
     * and <code>DONE</code> if one was not.
     *
     * @return The character index of the first match following <code>pos</code>,
     *          or <tt>DONE</tt> if there are no matches.
     */
    final public int following(int pos) {
        setIndex(pos);
        return next();
    }
    
    /**
     * Return the last index in the target text at which it matches
     * the search pattern and adjusts the iteration to point to that position.
     *
     * @return The index of the first match, or <tt>DONE</tt> if there
     *          are no matches.
     */
    final public int last() {
        setIndex(DONE);
        return previous();
    }

    /**
     * Return the first index less than <code>pos</code> at which the target
     * text matches the search pattern.   The iterator is adjusted so that its current index
     * (as returned by {@link #getIndex}) is the match posisition if one was found
     * and <tt>DONE</tt> if one was not.
     *
     * @return The character index of the first match preceding <code>pos</code>,
     *          or <code>DONE</code> if there are no matches.
     */
    final public int preceding(int pos) {
        setIndex(pos);
        return previous();
    }
    
    /**
     * Return the index of the next point at which the text matches the
     * search pattern, starting from the current position.
     * @return The index of the next match after the current position,
     *          or <code>DONE</code> if there are no more matches.
     *
     * @see #first
     */
    public int next() {
        if (index == BEFORE){
            // Starting at the beginning of the text
            index = target.getBeginIndex();
        } else if (length > 0) {
            // Finding the next match after a previous one
            index += overlap ? 1 : length;
        }
        index -= 1;
        
        do {
            length = 0;
            index = handleNext(index + 1);
        } while (index != DONE && !isBreakUnit(index, index+length));
        
        return index;
    }

    /**
     * Return the index of the previous point at which the text matches
     * the search pattern, starting at the current position
     *
     * @return The index of the previous match before the current position,
     *          or <code>DONE</code> if there are no more matches.
     */
    public int previous() {
        if (index == DONE) {
            index = target.getEndIndex();
        } else if (length > 0) {
            // Finding the previous match before a following one
            index = overlap ? index + length - 1 : index;
        }
        index += 1;
        
        do {
            length = 0;
            index = handlePrev(index - 1);
        } while (index != DONE && !isBreakUnit(index, index+length));

        if (index == DONE) {
            index = BEFORE;
        }
        return getIndex();
    }



    /**
     * Return the current index in the text being searched.
     * If the iteration has gone past the end of the text
     * (or past the beginning for a backwards search), 
     * {@link #DONE} is returned.
     */
    public int getIndex() {
        return index == BEFORE ? DONE : index;
    }

    /**
     * Determines whether overlapping matches are returned.  If this
     * property is <code>true</code>, matches that begin within the
     * boundry of the previous match are considered valid and will
     * be returned.  For example, when searching for "abab" in the
     * target text "ababab", both offsets 0 and 2 will be returned
     * as valid matches if this property is <code>true</code>.
     * <p>
     * The default setting of this property is <tt>true</tt>
     */
    public void setOverlapping(boolean allowOverlap) {
        overlap = allowOverlap;
    }
    
    /**
     * Determines whether overlapping matches are returned.
     *
     * @see #setOverlapping
     */
    public boolean isOverlapping() {
        return overlap;
    }
    
    /**
     * Returns the length of text in the target which matches the search
     * pattern.  This call returns a valid result only after a successful
     * call to {@link #first}, {@link #next}, {@link #previous}, or {@link #last}.
     * Just after construction, or after a searching method returns
     * <tt>DONE</tt>, this method will return 0.
     *
     * @return The length of the match in the target text, or 0 if there
     *          is no match currently.
     */
    public int getMatchLength() {
        return length;
    }

    /**
     * Set the BreakIterator that will be used to restrict the points
     * at which matches are detected.
     *
     * @param iterator  A {@link java.text.BreakIterator BreakIterator}
     *                  that will be used to restrict the points
     *                  at which matches are detected.  If a match is found, but the match's start
     *                  or end index is not a boundary as determined by
     *                  the <tt>BreakIterator</tt>, the match will be rejected and
     *                  another will be searched for.
     *
     *                  If this parameter is <tt>null</tt>, no break
     *                  detection is attempted.
     *
     * @see #getBreakIterator
     */
    public void setBreakIterator(BreakIterator iterator) {
        breaker = iterator;
        if (breaker != null) {
            breaker.setText(target);
        }
    }
    
    /**
     * Returns the BreakIterator that is used to restrict the points
     * at which matches are detected.  This will be the same object
     * that was passed to the constructor or to <code>setBreakIterator</code>.
     * Note that <tt>null</tt> is a legal value; it means that break
     * detection should not be attempted.
     *
     * @see #setBreakIterator
     */
    public BreakIterator getBreakIterator() {
        return breaker;
    }
    
    /**
     * Set the target text which should be searched and resets the
     * iterator's position to point before the start of the target text.
     * This method is useful if you want to re-use an iterator to
     * search for the same pattern within a different body of text.
     *
     * @see #getTarget
     */
    public void setTarget(CharacterIterator iterator) {
        target = iterator;
        if (breaker != null) {
            breaker.setText(target);
        }
        setIndex(BEFORE);
    }
    
    /**
     * Return the target text which is being searched
     *
     * @see #setTarget
     */
    public CharacterIterator getTarget() {
        return target;
    }
    
    /**
     * Returns the text that was matched by the most recent call to 
     * {@link #first}, {@link #next}, {@link #previous}, or {@link #last}.
     * If the iterator is not pointing at a valid match (e.g. just after
     * construction or after <tt>DONE</tt> has been returned, returns
     * an empty string.
     */
    public String getMatchedText() {
        StringBuffer buffer = new StringBuffer();
        
        if (length > 0) {
            int i = 0;
            for (char c = target.setIndex(index); i < length; c = target.next(), i++)
            {
                buffer.append(c);
            }
        }
        return buffer.toString();
    }

    //-------------------------------------------------------------------
    // Protected interface for subclasses
    //-------------------------------------------------------------------

    /**
     * Constructor for use by subclasses.
     * <p>
     * @param target    The target text to be searched.  This is for internal
     *                  use by this class.  Subclasses need to maintain their
     *                  own reference to or iterator over the target text
     *                  for use by their {@link #handleNext handleNext} and
     *                  {@link #handlePrev handlePrev} methods.
     *
     * @param breaker   A {@link BreakIterator} that is used to restrict the points
     *                  at which matches are detected.  If <tt>handleNext</tt> or
     *                  <tt>handlePrev</tt> finds a match, but the match's start
     *                  or end index is not a boundary as determined by
     *                  the <tt>BreakIterator</tt>, the match is rejected and 
     *                  <tt>handleNext</tt> or <tt>handlePrev</tt> is called again.
     *                  If this parameter is <tt>null</tt>, no break
     *                  detection is attempted.
     *                  
     */
    protected SearchIterator(CharacterIterator target, BreakIterator breaker)
    {
        this.target = target;
        
        if (breaker != null) {
            this.breaker = (BreakIterator)breaker.clone();
            this.breaker.setText(target);
        }
        
        index = target.getBeginIndex();
        length = 0;
    }

    /**
     * Abstract method which subclasses override to provide the mechanism
     * for finding the next match in the target text.  This allows different
     * subclasses to provide different search algorithms.
     * <p>
     * If a match is found, the implementation should return the index at
     * which the match starts and should call {@link #setMatchLength setMatchLength}
     * with the number of characters in the target
     * text that make up the match.  If no match is found, the method
     * should return DONE and should not call <tt>setMatchLength</tt>.
     * <p>
     * @param startAt   The index in the target text at which the search
     *                  should start.
     *
     * @see #setMatchLength
     */
    protected abstract int handleNext(int startAt);

    /**
     * Abstract method which subclasses override to provide the mechanism
     * for finding the previous match in the target text.  This allows different
     * subclasses to provide different search algorithms.
     * <p>
     * If a match is found, the implementation should return the index at
     * which the match starts and should call {@link #setMatchLength setMatchLength}
     * with the number of characters in the target
     * text that make up the match.  If no match is found, the method
     * should return DONE and should not call <tt>setMatchLength</tt>.
     * <p>
     * @param startAt   The index in the target text at which the search
     *                  should start.
     *
     * @see #setMatchLength
     */
    protected abstract int handlePrev(int startAt);

    /**
     * Sets the length of the currently matched string in the target text.
     * Subclasses' <code>handleNext</code> and <code>handlePrev</code>
     * methods should call this when they find a match in the target text.
     */
    protected void setMatchLength(int length) {
        this.length = length;
    }

    //-------------------------------------------------------------------
    // Privates
    //
    
    /**
     * Internal method used by preceding and following.  Sets the index
     * to point to the given position, and clears any state that's
     * affected.
     */
    private void setIndex(int pos) {
        index = pos;
        length = 0;
    }
    
    /**
     * Determine whether the target text bounded by <code>start</code> and
     * <code>end</code> is one or more whole units of text as determined by
     * the current <code>BreakIterator</code>.
     */
    private boolean isBreakUnit(int start, int end)
    {
        if (breaker == null) {
            return true;
        } 
        boolean startBound = breaker.isBoundary(start);
        boolean endBound = (end == target.getEndIndex()) || breaker.isBoundary(end);
        
        return startBound && endBound;
    }
    
    //-------------------------------------------------------------------------
    // Private data...
    //-------------------------------------------------------------------------
    private int                 index;          // Current position in the target text
    private int                 length;         // Length of matched text, or 0
    private boolean             overlap = true; // Return overlapping matches?
    private CharacterIterator   target;         // Target text to be searched
    private BreakIterator       breaker;        // Break iterator to constrain matches
 };
diff --git a/StringSearch.java b/StringSearch.java
 /*
 *******************************************************************************
 * Copyright (C) 1996-2000, International Business Machines Corporation and    *
 * others. All Rights Reserved.                                                *
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/StringSearch.java,v $ 
 * $Date: 2002/03/20 05:11:16 $ 
 * $Revision: 1.6 $
 *
 *****************************************************************************************
 */

 package com.ibm.icu.text;

 import java.text.BreakIterator;
 import java.text.CharacterIterator;
 import java.text.CollationElementIterator;
 import java.text.Collator;
 import java.text.RuleBasedCollator;
 import java.text.StringCharacterIterator;
 import java.util.Locale;

 /**
 * <code>StringSearch</code> is a <code>SearchIterator</code> that provides
 * language-sensitive text searching based on the comparison rules defined
 * in a {@link RuleBasedCollator} object.
 * Instances of <code>StringSearch</code> function as iterators
 * maintain a current position and scan over text returning the index of
 * characters where the pattern occurs and the length of each match.
 * <p>
 * <code>StringSearch</code> uses a version of the fast Boyer-Moore search
 * algorithm that has been adapted to work with the large character set of
 * Unicode.  See "Efficient Text Searching in Java", to be published in
 * <i>Java Report</i> in February, 1999, for further information on the algorithm.
 * <p>
 * Consult the <code>SearchIterator</code> documentation for information on
 * and examples of how to use instances of this class to implement text
 * searching.  <code>SearchIterator</code> provides all of the necessary
 * API; this class only provides constructors and internal implementation
 * methods.
 *
 * @see SearchIterator
 * @see java.text.RuleBasedCollator
 *
 * @author Laura Werner
 * @version 1.0
 */
 public final class StringSearch extends SearchIterator
 {
    /**
     * Construct a <code>StringSearch</code> object using a specific collator and set
     * of boundary-detection rules.
     * <p>
     * @param pat       The text for which this object will search.
     *
     * @param target    The text in which to search for the pattern.
     *
     * @param coll      A <code>RuleBasedCollator</code> object which defines the
     *                  language-sensitive comparison rules used to determine 
     *                  whether text in the pattern and target matches.
     *
     * @param breaker   A <code>BreakIterator</code> object used to constrain the matches
     *                  that are found.  Matches whose start and end indices
     *                  in the target text are not boundaries as determined
     *                  by the <code>BreakIterator</code> are ignored.  If this behavior
     *                  is not desired, <code>null</code> can be passed in instead.
     */
    public StringSearch(String pat, CharacterIterator target,
                            RuleBasedCollator coll, BreakIterator breaker) {
        super(target, breaker);

        pattern = pat;
        collator = coll;
        strength = coll.getStrength();
        target.setIndex(target.getBeginIndex());
        iter = collator.getCollationElementIterator(target);
        
        initialize();   // Initialize the Boyer-Moore tables
    }

    /**
     * Construct a <code>StringSearch</code> object using a specific collator.
     * <p>
     * @param pattern   The text for which this object will search.
     *
     * @param target    The text in which to search for the pattern.
     *
     * @param collator  A <code>RuleBasedCollator</code> object which defines the
     *                  language-sensitive comparison rules used to determine 
     *                  whether text in the pattern and target matches.
     */
    public StringSearch(String pattern,
                            CharacterIterator target,
                            RuleBasedCollator collator) {
        this(pattern, target, collator, BreakIterator.getCharacterInstance());
    }

    /**
     * Construct a <code>StringSearch</code> object using the collator and
     * character boundary detection rules for a given locale.
     * @param pattern   The text for which this object will search.
     *
     * @param target    The text in which to search for the pattern.
     *
     * @param loc       The locale whose collation and break-detection rules
     *                  should be used.
     *
     * @exception       ClassCastException thrown if the collator for the specified
     *                  locale is not a RuleBasedCollator.
     */
    public StringSearch(String pattern, CharacterIterator target, Locale loc) {
        this(pattern, target,
             (RuleBasedCollator) Collator.getInstance(loc),
             BreakIterator.getCharacterInstance(loc));
    }

    /**
     * Construct a <code>StringSearch</code> object using the collator for the default
     * locale.
     * @param pattern   The text for which this object will search.
     *
     * @param target    The text in which to search for the pattern.
     */
    public StringSearch(String pattern, String target) {
        this(pattern,
             new StringCharacterIterator(target),
             (RuleBasedCollator)Collator.getInstance(),
             BreakIterator.getCharacterInstance());
    }

    //-------------------------------------------------------------------
    // Getters and Setters
    //-------------------------------------------------------------------
    
    /**
     * Sets this object's strength property. The strength determines the
     * minimum level of difference considered significant during a
     * search.  Generally, {@link Collator#TERTIARY} and 
     * {@link Collator#IDENTICAL} indicate that all differences are
     * considered significant, {@link Collator#SECONDARY} indicates
     * that upper/lower case distinctions should be ignored, and
     * {@link Collator#PRIMARY} indicates that both case and accents
     * should be ignored.  However, the exact meanings of these constants
     * are determined by individual Collator objects.
     * <p>
     * @see java.text.Collator#PRIMARY
     * @see java.text.Collator#SECONDARY
     * @see java.text.Collator#TERTIARY
     * @see java.text.Collator#IDENTICAL
     */
    public void setStrength(int newStrength) {
        strength = newStrength;
        
        // Due to a bug (?) in CollationElementIterator, we must set the
        // collator's strength as well, since the iterator is going to
        // mask out the portions of the collation element that are not
        // relevant for the collator's current strength setting
        // Note that this makes it impossible to share a Collator among
        // multiple StringSearch objects if you adjust Strength settings.
        collator.setStrength(strength);
        initialize();
    }
    
    
    /**
     * Returns this object's strength property, which indicates what level
     * of differences are considered significant during a search.
     * <p>
     * @see #setStrength
     */
    public int getStrength() {
        return strength;
    }
    
    /**
     * Set the collator to be used for this string search.  Also changes
     * the search strength to match that of the new collator.
     * <p>
     * This method causes internal data such as Boyer-Moore shift tables
     * to be recalculated, but the iterator's position is unchanged.
     * <p>
     * @see #getCollator
     */
    public void setCollator(RuleBasedCollator coll) {
        collator = coll;
        strength = collator.getStrength();
        
        // Also need to recompute the pattern and get a new target iterator
        CharacterIterator target = getTarget();
        target.setIndex(target.getBeginIndex());
        iter = collator.getCollationElementIterator(target);
        initialize();
    }
    
    /**
     * Return the RuleBasedCollator being used for this string search.
     */
    public RuleBasedCollator getCollator() {
        return collator;
    }
    
    /**
     * Set the pattern for which to search.  
     * This method causes internal data such as Boyer-Moore shift tables
     * to be recalculated, but the iterator's position is unchanged.
     */
    public void setPattern(String pat) {
        pattern = pat;
        initialize();
    }
    
    /**
     * Returns the pattern for which this object is searching.
     */
    public String getPattern() {
        return pattern;
    }
    
    /**
     * Set the target text which should be searched and resets the
     * iterator's position to point before the start of the new text.
     * This method is useful if you want to re-use an iterator to
     * search for the same pattern within a different body of text.
     */
    public void setTarget(CharacterIterator target) {
        super.setTarget(target);
        
        // Since we're caching a CollationElementIterator, recreate it
        target.setIndex(target.getBeginIndex());
        iter = collator.getCollationElementIterator(target);
    }

    //-------------------------------------------------------------------
    // Privates
    //-------------------------------------------------------------------

    /**
     * Search forward for matching text, starting at a given location.
     * Clients should not call this method directly; instead they should call
     * {@link SearchIterator#next}.
     * <p>
     * If a match is found, this method returns the index at which the match
     * starts and calls {@link SearchIterator#setMatchLength}
     * with the number of characters in the target
     * text that make up the match.  If no match is found, the method returns
     * <code>DONE</code> and does not call <tt>setMatchLength</tt>.
     * <p>
     * @param start The index in the target text at which the search starts.
     *
     * @return      The index at which the matched text in the target starts, or DONE
     *              if no match was found.
     * <p>
     * @see SearchIterator#next
     * @see SearchIterator#DONE
     */
    protected int handleNext(int start)
    {
        CharacterIterator target = getTarget();
        
        int mask = getMask(strength);
        int done = CollationElementIterator.NULLORDER & mask;
        
        if (DEBUG) {
            debug("-------------------------handleNext-----------------------------------");
            debug("");
            debug("strength=" + strength + ", mask=" + Integer.toString(mask,16)
                + ", done=" + Integer.toString(done,16));
            debug("decomp=" + collator.getDecomposition());
            
            debug("target.begin=" + getTarget().getBeginIndex());
            debug("target.end=" + getTarget().getEndIndex());
            debug("start = " + start);
        }
        
        int index = start + minLen;
        int matchEnd = 0;

        while (index <= target.getEndIndex())
        {
            int patIndex = normLen;
            int tval = 0, pval = 0;
            boolean getP = true;
            
            iter.setOffset(index);
            matchEnd = index;
            
            if (DEBUG) debug(" outer loop: patIndex=" + patIndex + ", index=" + index + ", iter offset= " + iter.getOffset());
            
            while ((patIndex > 0 || getP == false) && iter.getOffset() > start)
            {
                if (DEBUG) {
                    debug("  inner loop: patIndex=" + patIndex + " iter=" + iter.getOffset());
                    debug("   getP=" + getP);
                }
                
                // Get the previous character in both the pattern and the target
                tval = iter.previous() & mask;
                
                if (getP) pval = valueList[--patIndex];
                getP = true;
                
                if (DEBUG) debug("   pval=" + Integer.toString(pval,16) + ", tval=" + Integer.toString(tval,16));
                
                if (tval == 0) {       // skip tval, use same pval
                    if (DEBUG) debug("   tval is ignorable");
                    getP = false;    
                }
                else if (pval != tval) {    // Mismatch, skip ahead
                    if (DEBUG) debug("   mismatch: skippping " + getShift(tval, patIndex));
                    
                    index += getShift(tval, patIndex);
                    break;
                }
                else if (patIndex == 0) {
                    // The values matched, and we're at the beginning of the pattern,
                    // which means we matched the whole thing.
                    start = iter.getOffset();
                    setMatchLength(matchEnd - start);
                    if (DEBUG) debug("Found match at index "+ start );
                    return start;
                }
            }
            if (DEBUG) {
 				debug(" end of inner loop: patIndex=" + patIndex + " iter=" + iter.getOffset());
 				debug("   getP=" + getP);
 			}
            
            if (index == matchEnd) {
                // We hit the beginning of the text being searched, which is
                // possible if it contains lots of ignorable characters.
                // Advance one character and try again.
                if (DEBUG) debug("hit beginning of target; advance by one");
                index++;
            }
        }
        if (DEBUG) debug("Fell off end of outer loop; returning DONE");
        return DONE;
    }

    /**
     * Search backward for matching text ,starting at a given location.
     * Clients should not call this method directly; instead they should call
     * <code>SearchIterator.previous()</code>, which this method overrides.
     * <p>
     * If a match is found, this method returns the index at which the match
     * starts and calls {@link SearchIterator#setMatchLength}
     * with the number of characters in the target
     * text that make up the match.  If no match is found, the method returns
     * <code>DONE</code> and does not call <tt>setMatchLength</tt>.
     * <p>
     * @param start The index in the target text at which the search starts.
     *
     * @return      The index at which the matched text in the target starts, or DONE
     *              if no match was found.
     * <p>
     * @see SearchIterator#previous
     * @see SearchIterator#DONE
     */
    protected int handlePrev(int start)
    {
        int patLen = normLen;
        int index = start - minLen;

        int mask = getMask(strength);
        int done = CollationElementIterator.NULLORDER & mask;

        if (DEBUG) {
            debug("-------------------------handlePrev-----------------------------------");
            debug("");
            debug("strength=" + strength + ", mask=" + Integer.toString(mask,16)
                + ", done=" + Integer.toString(done,16));
            debug("decomp=" + collator.getDecomposition());
            
            debug("target.begin=" + getTarget().getBeginIndex());
            debug("target.end=" + getTarget().getEndIndex());
            debug("start = " + start);
        }
        
        while (index >= 0) {
            int patIndex = 0;
            int tval = 0, pval = 0;
            boolean getP = true;

            iter.setOffset(index);

            if (DEBUG) debug(" outer loop: patIndex=" + patIndex + ", index=" + index);
            
            while ((patIndex < patLen || !getP) && iter.getOffset() < start)
            {
                if (DEBUG) {
                    debug("  inner loop: patIndex=" + patIndex + " iter=" + iter.getOffset());
                    debug("   getP=" + getP);
                }
                tval = iter.next() & mask;
                if (getP) pval = valueList[patIndex++];
                getP = true;

                if (DEBUG) debug("   pval=" + Integer.toString(pval,16) + ", tval=" + Integer.toString(tval,16));

                if (tval == done) {
                    if (DEBUG) debug("   end of target; no match");
                    return DONE;
                }
                else if (tval == 0) {
                    if (DEBUG) debug("   tval is ignorable");
                    getP = false;
                }
                else if (pval != tval) {
                    // We didn't match this pattern.  Skip ahead
                    if (DEBUG) debug("   mismatch: skippping " + getBackShift(tval, patIndex));
                    
                    int shift = getBackShift(tval, patIndex);
                    index -= shift;
                    break;
                }
                else if (patIndex == patLen) {
                    // The elements matched and we're at the end of the pattern,
                    // which means we matched the whole thing.
                    setMatchLength(iter.getOffset() - index);
                    if (DEBUG) debug("Found match at index "+ start );
                    return index;
                }
            }
            if (DEBUG) {
 				debug(" end of inner loop: patIndex=" + patIndex + " iter=" + iter.getOffset());
 				debug("   getP=" + getP);
 			}
            if (iter.getOffset() >= start) {
                // We hit the end of the text being searched, which is
                // possible if it contains lots of ignorable characters.
                // Back up one character and try again.
                if (DEBUG) debug("hit end of target; back by one");
                index--;
            }
        }
        if (DEBUG) debug("Fell off end of outer loop; returning DONE");
        return DONE;
    }

    /**
     * Return a bitmask that will select only the portions of a collation 
     * element that are significant at the given strength level.
     */
    private static final int getMask(int strength) {
        switch (strength) {
            case Collator.PRIMARY:
                return 0xFFFF0000;
            case Collator.SECONDARY:
                return 0xFFFFFF00;
            default:
                return 0xFFFFFFFF;
        }
    }
    

    //------------------------------------------------------------------------
    // Private Data
    //
    private CollationElementIterator    iter;
    private RuleBasedCollator           collator;
    private int                         strength;
    
    //------------------------------------------------------------------------
    // Everything from here on down is the data used to represent the
    // Boyer-Moore shift tables and the code that generates and manipulates
    // them.
    //
    private static final int MAX_TABLE = 256;        // Size of the shift tables
    
    private int     valueList[] = null;
    private int     shiftTable[] = new int[MAX_TABLE];
    private int     backShiftTable[] = new int[MAX_TABLE];

    private String  pattern;            // The pattern string
    private int     normLen = 0;        // num. of collation elements in pattern.
    private int     minLen = 0;         // Min of composed, decomposed versions
    private int     maxLen = 0;         // Max

    private void initialize() {
        if (DEBUG)  {
            debug("-------------------------initialize-----------------------------------");
            debug("pattern=" + pattern);
        }
        
        CollationElementIterator iter = collator.getCollationElementIterator(pattern);

        int mask = getMask(strength);

        // See how many non-ignorable collation keys are in the text
        normLen = 0;
        int elem;
        while ((elem = iter.next()) != CollationElementIterator.NULLORDER)
        {
            if ((elem & mask) != 0) {
                normLen++;
            }
        }

        // Save them all
        valueList = new int[normLen];
        int expandLen = 0;
        iter.reset();
        
        for (int i = 0; i < normLen; i++)
        {
            elem = iter.next();

            if ((elem & mask) != 0) {
                valueList[i] = elem & mask;
                
            }
            // Keep track of whether there are any expanding-character
            // sequences that can result in one of the characters that's in
            // the pattern.  If there are, we have to reduce the shift
            // distances calculated below to account for it.
            expandLen += iter.getMaxExpansion(elem) - 1;
        }

        //
        // We need to remember the size of the composed and decomposed
        // versions of the string.  Standard Boyer-Moore shift calculations
        // can be wrong by an amount up to that difference, since a small
        // small number of characters in the pattern can map to a larger
        // number in the text being searched, or vice-versa.
        //
        int uniLen = pattern.length();
        maxLen = Math.max(normLen, uniLen);
        minLen = Math.min(normLen, uniLen) - expandLen;

        if (DEBUG) debug("normLen=" + normLen + ", expandLen=" + expandLen
                        + ", maxLen=" + maxLen + ", minLen=" + minLen);
        
        // Now initialize the shift tables
        //
        // NOTE: This is the most conservative way to build them.  If we had a way
        // of knowing that there were no expanding/contracting chars in the rules,
        // we could get rid of the "- 1" in the shiftTable calculations.
        // But all of the default collators have at least one expansion or
        // contraction, so it probably doesn't matter anyway.
        //
        for (int i = 0; i < MAX_TABLE; i++) {
            shiftTable[i] = backShiftTable[i] = minLen;
        }

        for (int i = 0; i < normLen-1; i++) {
            shiftTable[hash(valueList[i])] = Math.max(minLen - i - 1, 1);
        }
        shiftTable[hash(valueList[normLen-1])] = 1;
        
        for (int i = normLen - 1; i > 0; i--) {
            backShiftTable[hash(valueList[i])] = i;
        }
        backShiftTable[hash(valueList[0])] = 1;

        if (DEBUG) dumpTables();
    }

    /**
     * Method used by StringSearch to determine how far to the right to
     * shift the pattern during a Boyer-Moore search.  
     *
     * @param curValue  The current value in the target text
     * @param curIndex  The index in the pattern at which we failed to match
     *                  curValue in the target text.
     */
    private int getShift( int curValue, int curIndex ) {
        int shiftAmt = shiftTable[hash(curValue)];

        // if (minLen != maxLen) {
            int adjust = normLen - curIndex;
            // if (shiftAmt > adjust + 1) {
            if (adjust > 1 && shiftAmt >= adjust) {
                if (DEBUG) debug("getShift: adjusting by " + adjust);
                // shiftAmt -= adjust;
                shiftAmt -= adjust - 1;
            }
        // }
        return shiftAmt;
    }

    /**
     * Method used by StringSearch to determine how far to the left to
     * shift the pattern during a reverse Boyer-Moore search.  
     *
     * @param curValue  The current value in the target text
     * @param curIndex  The index in the pattern at which we failed to match
     *                  curValue in the target text.
     */
    private int getBackShift( int curValue, int curIndex ) {
        int shiftAmt = backShiftTable[hash(curValue)];

        // if (minLen != maxLen) {
            int adjust = curIndex;
            // int adjust = normLen - (minLen - curIndex);
            if (adjust > 1 && shiftAmt > adjust) {
                shiftAmt -= adjust - 1;
            }
            /*
            if (shiftAmt > adjust + 1) {
                if (DEBUG) debug("getBackShift: adjusting by " + adjust);
                shiftAmt -= adjust;
            }
            */
        // }
        return shiftAmt;
    }

    /**
     * Hash a collation element from its full size (32 bits) down into a
     * value that can be used as an index into the shift tables.  Right
     * now we do a modulus by the size of the hash table.
     *
     * TODO: At some point I should experiment to see whether a slightly
     * more complicated hash function gives us a better distribution
     * on multilingual text.  I doubt it will have much effect on
     * performance, though.
     */
    private static final int hash(int order) {
        return CollationElementIterator.primaryOrder(order) % MAX_TABLE;
    }


    //-------------------------------------------------------------------------
    // Debugging support...
    //-------------------------------------------------------------------------

    static private final boolean DEBUG = false;

    static void debug(String str) {
        System.out.println(str);
    }

    void dumpTables() {
        for (int i = 0; i < MAX_TABLE; i++) {
            if (shiftTable[i] != minLen) {
                debug("shift[" + Integer.toString(i,16) + "] = " + shiftTable[i]);
            }
        }
        for (int i = 0; i < MAX_TABLE; i++) {
            if (backShiftTable[i] != minLen) {
                debug("backShift[" + Integer.toString(i,16) + "] = " + backShiftTable[i]);
            }
        }
    }
 };
	/*
	*******************************************************************************
	* Copyright (C) 1996-2000, International Business Machines Corporation and *
	* others. All Rights Reserved. *
	*******************************************************************************
	*
	* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/SearchIterator.java,v $
	* $Date: 2002/04/03 19:13:56 $
	* $Revision: 1.6 $
	*
	*****************************************************************************************
	*/

	package com.ibm.icu.text;

	import java.text.BreakIterator;
	import java.text.CharacterIterator;

	/**
	* <code>SearchIterator</code> is an abstract base class that provides methods
	* to search for a pattern within a text string. Instances of
	* <code>SearchIterator</code> maintain a current position and scan over
	* the target text, returning the indices the pattern is matched
	* and the length of each match.
	* <p>
	* <code>SearchIterator</code> is an abstract base class that defines a
	* protocol for text searching. Subclasses provide concrete implementations of
	* various search algorithms. For example, {@link StringSearch}
	* implements language-sensitive pattern matching based on the comparison rules
	* defined in a {@link java.text.RuleBasedCollator RuleBasedCollator} object.
	* <p>
	* Internally, <code>SearchIterator</code> scans text using a
	* {@link CharacterIterator}, and is thus able to scan text held
	* by any object implementing that protocol. A <code>StringCharacterIterator</code>
	* is used to scan <code>String</code> objects passed to <code>setText</code>.
	* <p>
	* <code>SearchIterator</code> provides an API that is similar to that of
	* other text iteration classes such as <code>BreakIterator</code>. Using this
	* class, it is easy to scan through text looking for all occurances of a
	* given pattern. The following example uses a <code>StringSearch</code> object to
	* find all instances of "fox" in the target string. Any other subclass of
	* <code>SearchIterator</code> can be used in an identical manner.
	* <pre><code>
	* String target = "The quick brown fox jumped over the lazy fox";
	* String pattern = "fox";
	*
	* SearchIterator iter = new StringSearch(pattern, target);
	*
	* for (int pos = iter.first(); pos != SearchIterator.DONE; pos = iter.next()) {
	* System.out.println("Found match at " + pos +
	* ", length is " + iter.getMatchLength());
	* }
	* </code></pre>
	*
	* @see StringSearch
	*/
	public abstract class SearchIterator {
	/**
	* DONE is returned by previous() and next() after all valid
	* matches have been returned, and by first() and last() if
	* there are no matches at all.
	*/
	public static final int DONE = -1;

	/**
	* Private value indicating that the iterator is pointing
	* before the beginning of the target text.
	*/
	private static final int BEFORE = -2;

	/**
	* Return the first index at which the target text matches the search
	* pattern. The iterator is adjusted so that its current index
	* (as returned by {@link #getIndex}) is the match posisition if one was found
	* and <code>DONE</code> if one was not.
	*
	* @return The character index of the first match, or <code>DONE</code> if there
	* are no matches.
	*/
	final public int first() {
	setIndex(BEFORE);
	return next();
	}

	/**
	* Return the first index greater than <tt>pos</tt> at which the target
	* text matches the search pattern. The iterator is adjusted so that its current index
	* (as returned by {@link #getIndex}) is the match posisition if one was found
	* and <code>DONE</code> if one was not.
	*
	* @return The character index of the first match following <code>pos</code>,
	* or <tt>DONE</tt> if there are no matches.
	*/
	final public int following(int pos) {
	setIndex(pos);
	return next();
	}

	/**
	* Return the last index in the target text at which it matches
	* the search pattern and adjusts the iteration to point to that position.
	*
	* @return The index of the first match, or <tt>DONE</tt> if there
	* are no matches.
	*/
	final public int last() {
	setIndex(DONE);
	return previous();
	}

	/**
	* Return the first index less than <code>pos</code> at which the target
	* text matches the search pattern. The iterator is adjusted so that its current index
	* (as returned by {@link #getIndex}) is the match posisition if one was found
	* and <tt>DONE</tt> if one was not.
	*
	* @return The character index of the first match preceding <code>pos</code>,
	* or <code>DONE</code> if there are no matches.
	*/
	final public int preceding(int pos) {
	setIndex(pos);
	return previous();
	}

	/**
	* Return the index of the next point at which the text matches the
	* search pattern, starting from the current position.
	* @return The index of the next match after the current position,
	* or <code>DONE</code> if there are no more matches.
	*
	* @see #first
	*/
	public int next() {
	if (index == BEFORE){
	// Starting at the beginning of the text
	index = target.getBeginIndex();
	} else if (length > 0) {
	// Finding the next match after a previous one
	index += overlap ? 1 : length;
	}
	index -= 1;

	do {
	length = 0;
	index = handleNext(index + 1);
	} while (index != DONE && !isBreakUnit(index, index+length));

	return index;
	}

	/**
	* Return the index of the previous point at which the text matches
	* the search pattern, starting at the current position
	*
	* @return The index of the previous match before the current position,
	* or <code>DONE</code> if there are no more matches.
	*/
	public int previous() {
	if (index == DONE) {
	index = target.getEndIndex();
	} else if (length > 0) {
	// Finding the previous match before a following one
	index = overlap ? index + length - 1 : index;
	}
	index += 1;

	do {
	length = 0;
	index = handlePrev(index - 1);
	} while (index != DONE && !isBreakUnit(index, index+length));

	if (index == DONE) {
	index = BEFORE;
	}
	return getIndex();
	}



	/**
	* Return the current index in the text being searched.
	* If the iteration has gone past the end of the text
	* (or past the beginning for a backwards search),
	* {@link #DONE} is returned.
	*/
	public int getIndex() {
	return index == BEFORE ? DONE : index;
	}

	/**
	* Determines whether overlapping matches are returned. If this
	* property is <code>true</code>, matches that begin within the
	* boundry of the previous match are considered valid and will
	* be returned. For example, when searching for "abab" in the
	* target text "ababab", both offsets 0 and 2 will be returned
	* as valid matches if this property is <code>true</code>.
	* <p>
	* The default setting of this property is <tt>true</tt>
	*/
	public void setOverlapping(boolean allowOverlap) {
	overlap = allowOverlap;
	}

	/**
	* Determines whether overlapping matches are returned.
	*
	* @see #setOverlapping
	*/
	public boolean isOverlapping() {
	return overlap;
	}

	/**
	* Returns the length of text in the target which matches the search
	* pattern. This call returns a valid result only after a successful
	* call to {@link #first}, {@link #next}, {@link #previous}, or {@link #last}.
	* Just after construction, or after a searching method returns
	* <tt>DONE</tt>, this method will return 0.
	*
	* @return The length of the match in the target text, or 0 if there
	* is no match currently.
	*/
	public int getMatchLength() {
	return length;
	}

	/**
	* Set the BreakIterator that will be used to restrict the points
	* at which matches are detected.
	*
	* @param iterator A {@link java.text.BreakIterator BreakIterator}
	* that will be used to restrict the points
	* at which matches are detected. If a match is found, but the match's start
	* or end index is not a boundary as determined by
	* the <tt>BreakIterator</tt>, the match will be rejected and
	* another will be searched for.
	*
	* If this parameter is <tt>null</tt>, no break
	* detection is attempted.
	*
	* @see #getBreakIterator
	*/
	public void setBreakIterator(BreakIterator iterator) {
	breaker = iterator;
	if (breaker != null) {
	breaker.setText(target);
	}
	}

	/**
	* Returns the BreakIterator that is used to restrict the points
	* at which matches are detected. This will be the same object
	* that was passed to the constructor or to <code>setBreakIterator</code>.
	* Note that <tt>null</tt> is a legal value; it means that break
	* detection should not be attempted.
	*
	* @see #setBreakIterator
	*/
	public BreakIterator getBreakIterator() {
	return breaker;
	}

	/**
	* Set the target text which should be searched and resets the
	* iterator's position to point before the start of the target text.
	* This method is useful if you want to re-use an iterator to
	* search for the same pattern within a different body of text.
	*
	* @see #getTarget
	*/
	public void setTarget(CharacterIterator iterator) {
	target = iterator;
	if (breaker != null) {
	breaker.setText(target);
	}
	setIndex(BEFORE);
	}

	/**
	* Return the target text which is being searched
	*
	* @see #setTarget
	*/
	public CharacterIterator getTarget() {
	return target;
	}

	/**
	* Returns the text that was matched by the most recent call to
	* {@link #first}, {@link #next}, {@link #previous}, or {@link #last}.
	* If the iterator is not pointing at a valid match (e.g. just after
	* construction or after <tt>DONE</tt> has been returned, returns
	* an empty string.
	*/
	public String getMatchedText() {
	StringBuffer buffer = new StringBuffer();

	if (length > 0) {
	int i = 0;
	for (char c = target.setIndex(index); i < length; c = target.next(), i++)
	{
	buffer.append(c);
	}
	}
	return buffer.toString();
	}

	//-------------------------------------------------------------------
	// Protected interface for subclasses
	//-------------------------------------------------------------------

	/**
	* Constructor for use by subclasses.
	* <p>
	* @param target The target text to be searched. This is for internal
	* use by this class. Subclasses need to maintain their
	* own reference to or iterator over the target text
	* for use by their {@link #handleNext handleNext} and
	* {@link #handlePrev handlePrev} methods.
	*
	* @param breaker A {@link BreakIterator} that is used to restrict the points
	* at which matches are detected. If <tt>handleNext</tt> or
	* <tt>handlePrev</tt> finds a match, but the match's start
	* or end index is not a boundary as determined by
	* the <tt>BreakIterator</tt>, the match is rejected and
	* <tt>handleNext</tt> or <tt>handlePrev</tt> is called again.
	* If this parameter is <tt>null</tt>, no break
	* detection is attempted.
	*
	*/
	protected SearchIterator(CharacterIterator target, BreakIterator breaker)
	{
	this.target = target;

	if (breaker != null) {
	this.breaker = (BreakIterator)breaker.clone();
	this.breaker.setText(target);
	}

	index = target.getBeginIndex();
	length = 0;
	}

	/**
	* Abstract method which subclasses override to provide the mechanism
	* for finding the next match in the target text. This allows different
	* subclasses to provide different search algorithms.
	* <p>
	* If a match is found, the implementation should return the index at
	* which the match starts and should call {@link #setMatchLength setMatchLength}
	* with the number of characters in the target
	* text that make up the match. If no match is found, the method
	* should return DONE and should not call <tt>setMatchLength</tt>.
	* <p>
	* @param startAt The index in the target text at which the search
	* should start.
	*
	* @see #setMatchLength
	*/
	protected abstract int handleNext(int startAt);

	/**
	* Abstract method which subclasses override to provide the mechanism
	* for finding the previous match in the target text. This allows different
	* subclasses to provide different search algorithms.
	* <p>
	* If a match is found, the implementation should return the index at
	* which the match starts and should call {@link #setMatchLength setMatchLength}
	* with the number of characters in the target
	* text that make up the match. If no match is found, the method
	* should return DONE and should not call <tt>setMatchLength</tt>.
	* <p>
	* @param startAt The index in the target text at which the search
	* should start.
	*
	* @see #setMatchLength
	*/
	protected abstract int handlePrev(int startAt);

	/**
	* Sets the length of the currently matched string in the target text.
	* Subclasses' <code>handleNext</code> and <code>handlePrev</code>
	* methods should call this when they find a match in the target text.
	*/
	protected void setMatchLength(int length) {
	this.length = length;
	}

	//-------------------------------------------------------------------
	// Privates
	//

	/**
	* Internal method used by preceding and following. Sets the index
	* to point to the given position, and clears any state that's
	* affected.
	*/
	private void setIndex(int pos) {
	index = pos;
	length = 0;
	}

	/**
	* Determine whether the target text bounded by <code>start</code> and
	* <code>end</code> is one or more whole units of text as determined by
	* the current <code>BreakIterator</code>.
	*/
	private boolean isBreakUnit(int start, int end)
	{
	if (breaker == null) {
	return true;
	}
	boolean startBound = breaker.isBoundary(start);
	boolean endBound = (end == target.getEndIndex()) \|\| breaker.isBoundary(end);

	return startBound && endBound;
	}

	//-------------------------------------------------------------------------
	// Private data...
	//-------------------------------------------------------------------------
	private int index; // Current position in the target text
	private int length; // Length of matched text, or 0
	private boolean overlap = true; // Return overlapping matches?
	private CharacterIterator target; // Target text to be searched
	private BreakIterator breaker; // Break iterator to constrain matches
	};