X-Git-Url: http://secure.phpeclipse.com diff --git a/net.sourceforge.phpeclipse.ui/src/net/sourceforge/phpdt/internal/ui/text/spelling/SpellCheckIterator.java b/net.sourceforge.phpeclipse.ui/src/net/sourceforge/phpdt/internal/ui/text/spelling/SpellCheckIterator.java new file mode 100644 index 0000000..022cabf --- /dev/null +++ b/net.sourceforge.phpeclipse.ui/src/net/sourceforge/phpdt/internal/ui/text/spelling/SpellCheckIterator.java @@ -0,0 +1,391 @@ +/******************************************************************************* + * Copyright (c) 2000, 2003 IBM Corporation and others. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Common Public License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/legal/cpl-v10.html + * + * Contributors: + * IBM Corporation - initial API and implementation + *******************************************************************************/ + +package net.sourceforge.phpdt.internal.ui.text.spelling; + +import java.text.BreakIterator; +import java.util.LinkedList; +import java.util.Locale; + +import net.sourceforge.phpdt.corext.refactoring.nls.NLSElement; +import net.sourceforge.phpdt.internal.ui.text.phpdoc.IHtmlTagConstants; +import net.sourceforge.phpdt.internal.ui.text.phpdoc.IJavaDocTagConstants; +import net.sourceforge.phpdt.internal.ui.text.spelling.engine.DefaultSpellChecker; +import net.sourceforge.phpdt.internal.ui.text.spelling.engine.ISpellCheckIterator; + +import org.eclipse.jface.text.IDocument; +import org.eclipse.jface.text.IRegion; +import org.eclipse.jface.text.TextUtilities; + +/** + * Iterator to spell-check javadoc comment regions. + * + * @since 3.0 + */ +public class SpellCheckIterator implements ISpellCheckIterator, + IJavaDocTagConstants, IHtmlTagConstants { + + /** The content of the region */ + private final String fContent; + + /** The line delimiter */ + private final String fDelimiter; + + /** The last token */ + private String fLastToken = null; + + /** The next break */ + private int fNext = 1; + + /** The offset of the region */ + private final int fOffset; + + /** The predecessor break */ + private int fPredecessor; + + /** The previous break */ + private int fPrevious = 0; + + /** The sentence breaks */ + private final LinkedList fSentenceBreaks = new LinkedList(); + + /** Does the current word start a sentence? */ + private boolean fStartsSentence = false; + + /** The successor break */ + private int fSuccessor; + + /** The word iterator */ + private final BreakIterator fWordIterator; + + /** + * Creates a new spell check iterator. + * + * @param document + * The document containing the specified partition + * @param region + * The region to spell-check + * @param locale + * The locale to use for spell-checking + */ + public SpellCheckIterator(final IDocument document, final IRegion region, + final Locale locale) { + + fOffset = region.getOffset(); + fWordIterator = BreakIterator.getWordInstance(locale); + fDelimiter = TextUtilities.getDefaultLineDelimiter(document); + + String content; + try { + + content = document.get(region.getOffset(), region.getLength()); + if (content.startsWith(NLSElement.TAG_PREFIX)) + content = ""; //$NON-NLS-1$ + + } catch (Exception exception) { + content = ""; //$NON-NLS-1$ + } + fContent = content; + + fWordIterator.setText(content); + fPredecessor = fWordIterator.first(); + fSuccessor = fWordIterator.next(); + + final BreakIterator iterator = BreakIterator + .getSentenceInstance(locale); + iterator.setText(content); + + int offset = iterator.current(); + while (offset != BreakIterator.DONE) { + + fSentenceBreaks.add(new Integer(offset)); + offset = iterator.next(); + } + } + + /* + * @see org.eclipse.spelling.done.ISpellCheckIterator#getBegin() + */ + public final int getBegin() { + return fPrevious + fOffset; + } + + /* + * @see org.eclipse.spelling.done.ISpellCheckIterator#getEnd() + */ + public final int getEnd() { + return fNext + fOffset - 1; + } + + /* + * @see java.util.Iterator#hasNext() + */ + public final boolean hasNext() { + return fSuccessor != BreakIterator.DONE; + } + + /** + * Does the specified token consist of at least one letter and digits only? + * + * @param begin + * The begin index + * @param end + * The end index + * @return true iff the token consists of digits and at least + * one letter only, false otherwise + */ + protected final boolean isAlphaNumeric(final int begin, final int end) { + + char character = 0; + + boolean letter = false; + for (int index = begin; index < end; index++) { + + character = fContent.charAt(index); + if (Character.isLetter(character)) + letter = true; + + if (!Character.isLetterOrDigit(character)) + return false; + } + return letter; + } + + /** + * Was the last token a Javadoc tag tag? + * + * @param tags + * The javadoc tags to check + * @return true iff the last token was a Javadoc tag, + * false otherwise + */ + protected final boolean isJavadocToken(final String[] tags) { + + if (fLastToken != null) { + + for (int index = 0; index < tags.length; index++) { + + if (fLastToken.equals(tags[index])) + return true; + } + } + return false; + } + + /** + * Is the current token a single letter token surrounded by non-whitespace + * characters? + * + * @param begin + * The begin index + * @return true iff the token is a single letter token, + * false otherwise + */ + protected final boolean isSingleLetter(final int begin) { + + if (begin > 0 && begin < fContent.length() - 1) + return Character.isWhitespace(fContent.charAt(begin - 1)) + && Character.isLetter(fContent.charAt(begin)) + && Character.isWhitespace(fContent.charAt(begin + 1)); + + return false; + } + + /** + * Does the specified token look like an URL? + * + * @param begin + * The begin index + * @return true iff this token look like an URL, + * false otherwise + */ + protected final boolean isUrlToken(final int begin) { + + for (int index = 0; index < DefaultSpellChecker.URL_PREFIXES.length; index++) { + + if (fContent.startsWith(DefaultSpellChecker.URL_PREFIXES[index], + begin)) + return true; + } + return false; + } + + /** + * Does the specified token consist of whitespace only? + * + * @param begin + * The begin index + * @param end + * The end index + * @return true iff the token consists of whitespace only, + * false otherwise + */ + protected final boolean isWhitespace(final int begin, final int end) { + + for (int index = begin; index < end; index++) { + + if (!Character.isWhitespace(fContent.charAt(index))) + return false; + } + return true; + } + + /* + * @see java.util.Iterator#next() + */ + public final Object next() { + + String token = nextToken(); + while (token == null && fSuccessor != BreakIterator.DONE) + token = nextToken(); + + fLastToken = token; + + return token; + } + + /** + * Advances the end index to the next word break. + */ + protected final void nextBreak() { + + fNext = fSuccessor; + fPredecessor = fSuccessor; + + fSuccessor = fWordIterator.next(); + } + + /** + * Returns the next sentence break. + * + * @return The next sentence break + */ + protected final int nextSentence() { + return ((Integer) fSentenceBreaks.getFirst()).intValue(); + } + + /** + * Determines the next token to be spell-checked. + * + * @return The next token to be spell-checked, or null iff + * the next token is not a candidate for spell-checking. + */ + protected String nextToken() { + + String token = null; + + fPrevious = fPredecessor; + fStartsSentence = false; + + nextBreak(); + + boolean update = false; + if (fNext - fPrevious > 0) { + + if (fSuccessor != BreakIterator.DONE + && fContent.charAt(fPrevious) == JAVADOC_TAG_PREFIX) { + + nextBreak(); + if (Character.isLetter(fContent.charAt(fPrevious + 1))) { + update = true; + token = fContent.substring(fPrevious, fNext); + } else + fPredecessor = fNext; + + } else if (fSuccessor != BreakIterator.DONE + && fContent.charAt(fPrevious) == HTML_TAG_PREFIX + && (Character.isLetter(fContent.charAt(fNext)) || fContent + .charAt(fNext) == '/')) { + + if (fContent.startsWith(HTML_CLOSE_PREFIX, fPrevious)) + nextBreak(); + + nextBreak(); + + if (fSuccessor != BreakIterator.DONE + && fContent.charAt(fNext) == HTML_TAG_POSTFIX) { + + nextBreak(); + if (fSuccessor != BreakIterator.DONE) { + update = true; + token = fContent.substring(fPrevious, fNext); + } + } + } else if (!isWhitespace(fPrevious, fNext) + && isAlphaNumeric(fPrevious, fNext)) { + + if (isUrlToken(fPrevious)) + skipTokens(fPrevious, ' '); + else if (isJavadocToken(JAVADOC_PARAM_TAGS)) + fLastToken = null; + else if (isJavadocToken(JAVADOC_REFERENCE_TAGS)) { + fLastToken = null; + skipTokens(fPrevious, fDelimiter.charAt(0)); + } else if (fNext - fPrevious > 1 || isSingleLetter(fPrevious)) + token = fContent.substring(fPrevious, fNext); + + update = true; + } + } + + if (update && fSentenceBreaks.size() > 0) { + + if (fPrevious >= nextSentence()) { + + while (fSentenceBreaks.size() > 0 + && fPrevious >= nextSentence()) + fSentenceBreaks.removeFirst(); + + fStartsSentence = (fLastToken == null) || (token != null); + } + } + return token; + } + + /* + * @see java.util.Iterator#remove() + */ + public final void remove() { + throw new UnsupportedOperationException(); + } + + /** + * Skip the tokens until the stop character is reached. + * + * @param begin + * The begin index + * @param stop + * The stop character + */ + protected final void skipTokens(final int begin, final char stop) { + + int end = begin; + + while (end < fContent.length() && fContent.charAt(end) != stop) + end++; + + if (end < fContent.length()) { + + fNext = end; + fPredecessor = fNext; + + fSuccessor = fWordIterator.following(fNext); + } else + fSuccessor = BreakIterator.DONE; + } + + /* + * @see org.eclipse.spelling.done.ISpellCheckIterator#startsSentence() + */ + public final boolean startsSentence() { + return fStartsSentence; + } +}