--- /dev/null
+/*******************************************************************************
+ * Copyright (c) 2000, 2003 IBM Corporation and others.
+ * All rights reserved. This program and the accompanying materials
+ * are made available under the terms of the Common Public License v1.0
+ * which accompanies this distribution, and is available at
+ * http://www.eclipse.org/legal/cpl-v10.html
+ *
+ * Contributors:
+ * IBM Corporation - initial API and implementation
+ *******************************************************************************/
+
+package net.sourceforge.phpdt.internal.ui.text.spelling;
+
+import java.text.BreakIterator;
+import java.util.LinkedList;
+import java.util.Locale;
+
+import net.sourceforge.phpdt.corext.refactoring.nls.NLSElement;
+import net.sourceforge.phpdt.internal.ui.text.phpdoc.IHtmlTagConstants;
+import net.sourceforge.phpdt.internal.ui.text.phpdoc.IJavaDocTagConstants;
+import net.sourceforge.phpdt.internal.ui.text.spelling.engine.DefaultSpellChecker;
+import net.sourceforge.phpdt.internal.ui.text.spelling.engine.ISpellCheckIterator;
+
+import org.eclipse.jface.text.IDocument;
+import org.eclipse.jface.text.IRegion;
+import org.eclipse.jface.text.TextUtilities;
+
+/**
+ * Iterator to spell-check javadoc comment regions.
+ *
+ * @since 3.0
+ */
+public class SpellCheckIterator implements ISpellCheckIterator,
+ IJavaDocTagConstants, IHtmlTagConstants {
+
+ /** The content of the region */
+ private final String fContent;
+
+ /** The line delimiter */
+ private final String fDelimiter;
+
+ /** The last token */
+ private String fLastToken = null;
+
+ /** The next break */
+ private int fNext = 1;
+
+ /** The offset of the region */
+ private final int fOffset;
+
+ /** The predecessor break */
+ private int fPredecessor;
+
+ /** The previous break */
+ private int fPrevious = 0;
+
+ /** The sentence breaks */
+ private final LinkedList fSentenceBreaks = new LinkedList();
+
+ /** Does the current word start a sentence? */
+ private boolean fStartsSentence = false;
+
+ /** The successor break */
+ private int fSuccessor;
+
+ /** The word iterator */
+ private final BreakIterator fWordIterator;
+
+ /**
+ * Creates a new spell check iterator.
+ *
+ * @param document
+ * The document containing the specified partition
+ * @param region
+ * The region to spell-check
+ * @param locale
+ * The locale to use for spell-checking
+ */
+ public SpellCheckIterator(final IDocument document, final IRegion region,
+ final Locale locale) {
+
+ fOffset = region.getOffset();
+ fWordIterator = BreakIterator.getWordInstance(locale);
+ fDelimiter = TextUtilities.getDefaultLineDelimiter(document);
+
+ String content;
+ try {
+
+ content = document.get(region.getOffset(), region.getLength());
+ if (content.startsWith(NLSElement.TAG_PREFIX))
+ content = ""; //$NON-NLS-1$
+
+ } catch (Exception exception) {
+ content = ""; //$NON-NLS-1$
+ }
+ fContent = content;
+
+ fWordIterator.setText(content);
+ fPredecessor = fWordIterator.first();
+ fSuccessor = fWordIterator.next();
+
+ final BreakIterator iterator = BreakIterator
+ .getSentenceInstance(locale);
+ iterator.setText(content);
+
+ int offset = iterator.current();
+ while (offset != BreakIterator.DONE) {
+
+ fSentenceBreaks.add(new Integer(offset));
+ offset = iterator.next();
+ }
+ }
+
+ /*
+ * @see org.eclipse.spelling.done.ISpellCheckIterator#getBegin()
+ */
+ public final int getBegin() {
+ return fPrevious + fOffset;
+ }
+
+ /*
+ * @see org.eclipse.spelling.done.ISpellCheckIterator#getEnd()
+ */
+ public final int getEnd() {
+ return fNext + fOffset - 1;
+ }
+
+ /*
+ * @see java.util.Iterator#hasNext()
+ */
+ public final boolean hasNext() {
+ return fSuccessor != BreakIterator.DONE;
+ }
+
+ /**
+ * Does the specified token consist of at least one letter and digits only?
+ *
+ * @param begin
+ * The begin index
+ * @param end
+ * The end index
+ * @return <code>true</code> iff the token consists of digits and at least
+ * one letter only, <code>false</code> otherwise
+ */
+ protected final boolean isAlphaNumeric(final int begin, final int end) {
+
+ char character = 0;
+
+ boolean letter = false;
+ for (int index = begin; index < end; index++) {
+
+ character = fContent.charAt(index);
+ if (Character.isLetter(character))
+ letter = true;
+
+ if (!Character.isLetterOrDigit(character))
+ return false;
+ }
+ return letter;
+ }
+
+ /**
+ * Was the last token a Javadoc tag tag?
+ *
+ * @param tags
+ * The javadoc tags to check
+ * @return <code>true</code> iff the last token was a Javadoc tag,
+ * <code>false</code> otherwise
+ */
+ protected final boolean isJavadocToken(final String[] tags) {
+
+ if (fLastToken != null) {
+
+ for (int index = 0; index < tags.length; index++) {
+
+ if (fLastToken.equals(tags[index]))
+ return true;
+ }
+ }
+ return false;
+ }
+
+ /**
+ * Is the current token a single letter token surrounded by non-whitespace
+ * characters?
+ *
+ * @param begin
+ * The begin index
+ * @return <code>true</code> iff the token is a single letter token,
+ * <code>false</code> otherwise
+ */
+ protected final boolean isSingleLetter(final int begin) {
+
+ if (begin > 0 && begin < fContent.length() - 1)
+ return Character.isWhitespace(fContent.charAt(begin - 1))
+ && Character.isLetter(fContent.charAt(begin))
+ && Character.isWhitespace(fContent.charAt(begin + 1));
+
+ return false;
+ }
+
+ /**
+ * Does the specified token look like an URL?
+ *
+ * @param begin
+ * The begin index
+ * @return <code>true</code> iff this token look like an URL,
+ * <code>false</code> otherwise
+ */
+ protected final boolean isUrlToken(final int begin) {
+
+ for (int index = 0; index < DefaultSpellChecker.URL_PREFIXES.length; index++) {
+
+ if (fContent.startsWith(DefaultSpellChecker.URL_PREFIXES[index],
+ begin))
+ return true;
+ }
+ return false;
+ }
+
+ /**
+ * Does the specified token consist of whitespace only?
+ *
+ * @param begin
+ * The begin index
+ * @param end
+ * The end index
+ * @return <code>true</code> iff the token consists of whitespace only,
+ * <code>false</code> otherwise
+ */
+ protected final boolean isWhitespace(final int begin, final int end) {
+
+ for (int index = begin; index < end; index++) {
+
+ if (!Character.isWhitespace(fContent.charAt(index)))
+ return false;
+ }
+ return true;
+ }
+
+ /*
+ * @see java.util.Iterator#next()
+ */
+ public final Object next() {
+
+ String token = nextToken();
+ while (token == null && fSuccessor != BreakIterator.DONE)
+ token = nextToken();
+
+ fLastToken = token;
+
+ return token;
+ }
+
+ /**
+ * Advances the end index to the next word break.
+ */
+ protected final void nextBreak() {
+
+ fNext = fSuccessor;
+ fPredecessor = fSuccessor;
+
+ fSuccessor = fWordIterator.next();
+ }
+
+ /**
+ * Returns the next sentence break.
+ *
+ * @return The next sentence break
+ */
+ protected final int nextSentence() {
+ return ((Integer) fSentenceBreaks.getFirst()).intValue();
+ }
+
+ /**
+ * Determines the next token to be spell-checked.
+ *
+ * @return The next token to be spell-checked, or <code>null</code> iff
+ * the next token is not a candidate for spell-checking.
+ */
+ protected String nextToken() {
+
+ String token = null;
+
+ fPrevious = fPredecessor;
+ fStartsSentence = false;
+
+ nextBreak();
+
+ boolean update = false;
+ if (fNext - fPrevious > 0) {
+
+ if (fSuccessor != BreakIterator.DONE
+ && fContent.charAt(fPrevious) == JAVADOC_TAG_PREFIX) {
+
+ nextBreak();
+ if (Character.isLetter(fContent.charAt(fPrevious + 1))) {
+ update = true;
+ token = fContent.substring(fPrevious, fNext);
+ } else
+ fPredecessor = fNext;
+
+ } else if (fSuccessor != BreakIterator.DONE
+ && fContent.charAt(fPrevious) == HTML_TAG_PREFIX
+ && (Character.isLetter(fContent.charAt(fNext)) || fContent
+ .charAt(fNext) == '/')) {
+
+ if (fContent.startsWith(HTML_CLOSE_PREFIX, fPrevious))
+ nextBreak();
+
+ nextBreak();
+
+ if (fSuccessor != BreakIterator.DONE
+ && fContent.charAt(fNext) == HTML_TAG_POSTFIX) {
+
+ nextBreak();
+ if (fSuccessor != BreakIterator.DONE) {
+ update = true;
+ token = fContent.substring(fPrevious, fNext);
+ }
+ }
+ } else if (!isWhitespace(fPrevious, fNext)
+ && isAlphaNumeric(fPrevious, fNext)) {
+
+ if (isUrlToken(fPrevious))
+ skipTokens(fPrevious, ' ');
+ else if (isJavadocToken(JAVADOC_PARAM_TAGS))
+ fLastToken = null;
+ else if (isJavadocToken(JAVADOC_REFERENCE_TAGS)) {
+ fLastToken = null;
+ skipTokens(fPrevious, fDelimiter.charAt(0));
+ } else if (fNext - fPrevious > 1 || isSingleLetter(fPrevious))
+ token = fContent.substring(fPrevious, fNext);
+
+ update = true;
+ }
+ }
+
+ if (update && fSentenceBreaks.size() > 0) {
+
+ if (fPrevious >= nextSentence()) {
+
+ while (fSentenceBreaks.size() > 0
+ && fPrevious >= nextSentence())
+ fSentenceBreaks.removeFirst();
+
+ fStartsSentence = (fLastToken == null) || (token != null);
+ }
+ }
+ return token;
+ }
+
+ /*
+ * @see java.util.Iterator#remove()
+ */
+ public final void remove() {
+ throw new UnsupportedOperationException();
+ }
+
+ /**
+ * Skip the tokens until the stop character is reached.
+ *
+ * @param begin
+ * The begin index
+ * @param stop
+ * The stop character
+ */
+ protected final void skipTokens(final int begin, final char stop) {
+
+ int end = begin;
+
+ while (end < fContent.length() && fContent.charAt(end) != stop)
+ end++;
+
+ if (end < fContent.length()) {
+
+ fNext = end;
+ fPredecessor = fNext;
+
+ fSuccessor = fWordIterator.following(fNext);
+ } else
+ fSuccessor = BreakIterator.DONE;
+ }
+
+ /*
+ * @see org.eclipse.spelling.done.ISpellCheckIterator#startsSentence()
+ */
+ public final boolean startsSentence() {
+ return fStartsSentence;
+ }
+}