X-Git-Url: http://secure.phpeclipse.com diff --git a/net.sourceforge.phpeclipse.ui/src/net/sourceforge/phpdt/internal/ui/text/JavaBreakIterator.java b/net.sourceforge.phpeclipse.ui/src/net/sourceforge/phpdt/internal/ui/text/JavaBreakIterator.java new file mode 100644 index 0000000..f80d88a --- /dev/null +++ b/net.sourceforge.phpeclipse.ui/src/net/sourceforge/phpdt/internal/ui/text/JavaBreakIterator.java @@ -0,0 +1,444 @@ +/******************************************************************************* + * Copyright (c) 2000, 2004 IBM Corporation and others. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Common Public License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/legal/cpl-v10.html + * + * Contributors: + * IBM Corporation - initial API and implementation + *******************************************************************************/ +package net.sourceforge.phpdt.internal.ui.text; + +import java.text.BreakIterator; +import java.text.CharacterIterator; + +import net.sourceforge.phpdt.internal.compiler.parser.Scanner; + +import org.eclipse.jface.text.Assert; + +/** + * A java break iterator. It returns all breaks, including before and after + * whitespace, and it returns all camelcase breaks. + *

+ * A line break may be any of "\n", "\r", "\r\n", "\n\r". + *

+ * + * @since 3.0 + */ +public class JavaBreakIterator extends BreakIterator { + + /** + * A run of common characters. + */ + protected static abstract class Run { + /** The length of this run. */ + protected int length; + + public Run() { + init(); + } + + /** + * Returns true if this run consumes ch, + * false otherwise. If true is returned, + * the length of the receiver is adjusted accordingly. + * + * @param ch + * the character to test + * @return true if ch was consumed + */ + protected boolean consume(char ch) { + if (isValid(ch)) { + length++; + return true; + } + return false; + } + + /** + * Whether this run accepts that character; does not update state. + * Called from the default implementation of consume. + * + * @param ch + * the character to test + * @return true if ch is accepted + */ + protected abstract boolean isValid(char ch); + + /** + * Resets this run to the initial state. + */ + protected void init() { + length = 0; + } + } + + static final class Whitespace extends Run { + protected boolean isValid(char ch) { + return Character.isWhitespace(ch) && ch != '\n' && ch != '\r'; + } + } + + static final class LineDelimiter extends Run { + /** State: INIT -> delimiter -> EXIT. */ + private char fState; + + private static final char INIT = '\0'; + + private static final char EXIT = '\1'; + + /* + * @see net.sourceforge.phpdt.internal.ui.text.JavaBreakIterator.Run#init() + */ + protected void init() { + super.init(); + fState = INIT; + } + + /* + * @see net.sourceforge.phpdt.internal.ui.text.JavaBreakIterator.Run#consume(char) + */ + protected boolean consume(char ch) { + if (!isValid(ch) || fState == EXIT) + return false; + + if (fState == INIT) { + fState = ch; + length++; + return true; + } else if (fState != ch) { + fState = EXIT; + length++; + return true; + } else { + return false; + } + } + + protected boolean isValid(char ch) { + return ch == '\n' || ch == '\r'; + } + } + + static final class Identifier extends Run { + /* + * @see net.sourceforge.phpdt.internal.ui.text.JavaBreakIterator.Run#isValid(char) + */ + protected boolean isValid(char ch) { + return Scanner.isPHPIdentifierPart(ch); + } + } + + static final class CamelCaseIdentifier extends Run { + /* states */ + private static final int S_INIT = 0; + + private static final int S_LOWER = 1; + + private static final int S_ONE_CAP = 2; + + private static final int S_ALL_CAPS = 3; + + private static final int S_EXIT = 4; + + private static final int S_EXIT_MINUS_ONE = 5; + + /* character types */ + private static final int K_INVALID = 0; + + private static final int K_LOWER = 1; + + private static final int K_UPPER = 2; + + private static final int K_OTHER = 3; + + private int fState; + + private final static int[][] MATRIX = new int[][] { + // K_INVALID, K_LOWER, K_UPPER, K_OTHER + { S_EXIT, S_LOWER, S_ONE_CAP, S_LOWER }, // S_INIT + { S_EXIT, S_LOWER, S_EXIT, S_LOWER }, // S_LOWER + { S_EXIT, S_LOWER, S_ALL_CAPS, S_LOWER }, // S_ONE_CAP + { S_EXIT, S_EXIT_MINUS_ONE, S_ALL_CAPS, S_LOWER }, // S_ALL_CAPS + }; + + /* + * @see net.sourceforge.phpdt.internal.ui.text.JavaBreakIterator.Run#init() + */ + protected void init() { + super.init(); + fState = S_INIT; + } + + /* + * @see net.sourceforge.phpdt.internal.ui.text.JavaBreakIterator.Run#consumes(char) + */ + protected boolean consume(char ch) { + int kind = getKind(ch); + fState = MATRIX[fState][kind]; + switch (fState) { + case S_LOWER: + case S_ONE_CAP: + case S_ALL_CAPS: + length++; + return true; + case S_EXIT: + return false; + case S_EXIT_MINUS_ONE: + length--; + return false; + default: + Assert.isTrue(false); + return false; + } + } + + /** + * Determines the kind of a character. + * + * @param ch + * the character to test + */ + private int getKind(char ch) { + if (Character.isUpperCase(ch)) + return K_UPPER; + if (Character.isLowerCase(ch)) + return K_LOWER; + if (Scanner.isPHPIdentifierPart(ch)) // _, digits... + return K_OTHER; + return K_INVALID; + } + + /* + * @see net.sourceforge.phpdt.internal.ui.text.JavaBreakIterator.Run#isValid(char) + */ + protected boolean isValid(char ch) { + return Scanner.isPHPIdentifierPart(ch); + } + } + + static final class Other extends Run { + /* + * @see net.sourceforge.phpdt.internal.ui.text.JavaBreakIterator.Run#isValid(char) + */ + protected boolean isValid(char ch) { + return !Character.isWhitespace(ch) + && !Scanner.isPHPIdentifierPart(ch); + } + } + + private static final Run WHITESPACE = new Whitespace(); + + private static final Run DELIMITER = new LineDelimiter(); + + private static final Run CAMELCASE = new CamelCaseIdentifier(); // new + // Identifier(); + + private static final Run OTHER = new Other(); + + /** The platform break iterator (word instance) used as a base. */ + protected final BreakIterator fIterator; + + /** The text we operate on. */ + protected CharSequence fText; + + /** our current position for the stateful methods. */ + private int fIndex; + + /** + * Creates a new break iterator. + */ + public JavaBreakIterator() { + fIterator = BreakIterator.getWordInstance(); + fIndex = fIterator.current(); + } + + /* + * @see java.text.BreakIterator#current() + */ + public int current() { + return fIndex; + } + + /* + * @see java.text.BreakIterator#first() + */ + public int first() { + fIndex = fIterator.first(); + return fIndex; + } + + /* + * @see java.text.BreakIterator#following(int) + */ + public int following(int offset) { + // work around too eager IAEs in standard impl + if (offset == getText().getEndIndex()) + return DONE; + + int next = fIterator.following(offset); + if (next == DONE) + return DONE; + + // TODO deal with complex script word boundaries + // Math.min(offset + run.length, next) does not work + // since wordinstance considers _ as boundaries + // seems to work fine, however + Run run = consumeRun(offset); + return offset + run.length; + + } + + /** + * Consumes a run of characters at the limits of which we introduce a break. + * + * @param offset + * the offset to start at + * @return the run that was consumed + */ + private Run consumeRun(int offset) { + // assert offset < length + + char ch = fText.charAt(offset); + int length = fText.length(); + Run run = getRun(ch); + while (run.consume(ch) && offset < length - 1) { + offset++; + ch = fText.charAt(offset); + } + + return run; + } + + /** + * Retunrs a run based on a character. + * + * @param ch + * the character to test + * @return the correct character given ch + */ + private Run getRun(char ch) { + Run run; + if (WHITESPACE.isValid(ch)) + run = WHITESPACE; + else if (DELIMITER.isValid(ch)) + run = DELIMITER; + else if (CAMELCASE.isValid(ch)) + run = CAMELCASE; + else if (OTHER.isValid(ch)) + run = OTHER; + else { + Assert.isTrue(false); + return null; + } + + run.init(); + return run; + } + + /* + * @see java.text.BreakIterator#getText() + */ + public CharacterIterator getText() { + return fIterator.getText(); + } + + /* + * @see java.text.BreakIterator#isBoundary(int) + */ + public boolean isBoundary(int offset) { + if (offset == getText().getBeginIndex()) + return true; + else + return following(offset - 1) == offset; + } + + /* + * @see java.text.BreakIterator#last() + */ + public int last() { + fIndex = fIterator.last(); + return fIndex; + } + + /* + * @see java.text.BreakIterator#next() + */ + public int next() { + fIndex = following(fIndex); + return fIndex; + } + + /* + * @see java.text.BreakIterator#next(int) + */ + public int next(int n) { + return fIterator.next(n); + } + + /* + * @see java.text.BreakIterator#preceding(int) + */ + public int preceding(int offset) { + if (offset == getText().getBeginIndex()) + return DONE; + + if (isBoundary(offset - 1)) + return offset - 1; + + int previous = offset - 1; + do { + previous = fIterator.preceding(previous); + } while (!isBoundary(previous)); + + int last = DONE; + while (previous < offset) { + last = previous; + previous = following(previous); + } + + return last; + } + + /* + * @see java.text.BreakIterator#previous() + */ + public int previous() { + fIndex = preceding(fIndex); + return fIndex; + } + + /* + * @see java.text.BreakIterator#setText(java.lang.String) + */ + public void setText(String newText) { + setText((CharSequence) newText); + } + + /** + * Creates a break iterator given a char sequence. + * + * @param newText + * the new text + */ + public void setText(CharSequence newText) { + fText = newText; + fIterator.setText(new SequenceCharacterIterator(newText)); + first(); + } + + /* + * @see java.text.BreakIterator#setText(java.text.CharacterIterator) + */ + public void setText(CharacterIterator newText) { + if (newText instanceof CharSequence) { + fText = (CharSequence) newText; + fIterator.setText(newText); + first(); + } else { + throw new UnsupportedOperationException( + "CharacterIterator not supported"); //$NON-NLS-1$ + } + } +}