/******************************************************************************* * Copyright (c) 2000, 2003 IBM Corporation and others. * All rights reserved. This program and the accompanying materials * are made available under the terms of the Common Public License v1.0 * which accompanies this distribution, and is available at * http://www.eclipse.org/legal/cpl-v10.html * * Contributors: * IBM Corporation - initial API and implementation *******************************************************************************/ package net.sourceforge.phpdt.internal.ui.text.spelling; import java.text.BreakIterator; import java.util.LinkedList; import java.util.Locale; import net.sourceforge.phpdt.corext.refactoring.nls.NLSElement; import net.sourceforge.phpdt.internal.ui.text.phpdoc.IHtmlTagConstants; import net.sourceforge.phpdt.internal.ui.text.phpdoc.IJavaDocTagConstants; import net.sourceforge.phpdt.internal.ui.text.spelling.engine.DefaultSpellChecker; import net.sourceforge.phpdt.internal.ui.text.spelling.engine.ISpellCheckIterator; import org.eclipse.jface.text.IDocument; import org.eclipse.jface.text.IRegion; import org.eclipse.jface.text.TextUtilities; /** * Iterator to spell-check javadoc comment regions. * * @since 3.0 */ public class SpellCheckIterator implements ISpellCheckIterator, IJavaDocTagConstants, IHtmlTagConstants { /** The content of the region */ private final String fContent; /** The line delimiter */ private final String fDelimiter; /** The last token */ private String fLastToken = null; /** The next break */ private int fNext = 1; /** The offset of the region */ private final int fOffset; /** The predecessor break */ private int fPredecessor; /** The previous break */ private int fPrevious = 0; /** The sentence breaks */ private final LinkedList fSentenceBreaks = new LinkedList(); /** Does the current word start a sentence? */ private boolean fStartsSentence = false; /** The successor break */ private int fSuccessor; /** The word iterator */ private final BreakIterator fWordIterator; /** * Creates a new spell check iterator. * * @param document * The document containing the specified partition * @param region * The region to spell-check * @param locale * The locale to use for spell-checking */ public SpellCheckIterator(final IDocument document, final IRegion region, final Locale locale) { fOffset = region.getOffset(); fWordIterator = BreakIterator.getWordInstance(locale); fDelimiter = TextUtilities.getDefaultLineDelimiter(document); String content; try { content = document.get(region.getOffset(), region.getLength()); if (content.startsWith(NLSElement.TAG_PREFIX)) content = ""; //$NON-NLS-1$ } catch (Exception exception) { content = ""; //$NON-NLS-1$ } fContent = content; fWordIterator.setText(content); fPredecessor = fWordIterator.first(); fSuccessor = fWordIterator.next(); final BreakIterator iterator = BreakIterator .getSentenceInstance(locale); iterator.setText(content); int offset = iterator.current(); while (offset != BreakIterator.DONE) { fSentenceBreaks.add(new Integer(offset)); offset = iterator.next(); } } /* * @see org.eclipse.spelling.done.ISpellCheckIterator#getBegin() */ public final int getBegin() { return fPrevious + fOffset; } /* * @see org.eclipse.spelling.done.ISpellCheckIterator#getEnd() */ public final int getEnd() { return fNext + fOffset - 1; } /* * @see java.util.Iterator#hasNext() */ public final boolean hasNext() { return fSuccessor != BreakIterator.DONE; } /** * Does the specified token consist of at least one letter and digits only? * * @param begin * The begin index * @param end * The end index * @return true iff the token consists of digits and at least * one letter only, false otherwise */ protected final boolean isAlphaNumeric(final int begin, final int end) { char character = 0; boolean letter = false; for (int index = begin; index < end; index++) { character = fContent.charAt(index); if (Character.isLetter(character)) letter = true; if (!Character.isLetterOrDigit(character)) return false; } return letter; } /** * Was the last token a Javadoc tag tag? * * @param tags * The javadoc tags to check * @return true iff the last token was a Javadoc tag, * false otherwise */ protected final boolean isJavadocToken(final String[] tags) { if (fLastToken != null) { for (int index = 0; index < tags.length; index++) { if (fLastToken.equals(tags[index])) return true; } } return false; } /** * Is the current token a single letter token surrounded by non-whitespace * characters? * * @param begin * The begin index * @return true iff the token is a single letter token, * false otherwise */ protected final boolean isSingleLetter(final int begin) { if (begin > 0 && begin < fContent.length() - 1) return Character.isWhitespace(fContent.charAt(begin - 1)) && Character.isLetter(fContent.charAt(begin)) && Character.isWhitespace(fContent.charAt(begin + 1)); return false; } /** * Does the specified token look like an URL? * * @param begin * The begin index * @return true iff this token look like an URL, * false otherwise */ protected final boolean isUrlToken(final int begin) { for (int index = 0; index < DefaultSpellChecker.URL_PREFIXES.length; index++) { if (fContent.startsWith(DefaultSpellChecker.URL_PREFIXES[index], begin)) return true; } return false; } /** * Does the specified token consist of whitespace only? * * @param begin * The begin index * @param end * The end index * @return true iff the token consists of whitespace only, * false otherwise */ protected final boolean isWhitespace(final int begin, final int end) { for (int index = begin; index < end; index++) { if (!Character.isWhitespace(fContent.charAt(index))) return false; } return true; } /* * @see java.util.Iterator#next() */ public final Object next() { String token = nextToken(); while (token == null && fSuccessor != BreakIterator.DONE) token = nextToken(); fLastToken = token; return token; } /** * Advances the end index to the next word break. */ protected final void nextBreak() { fNext = fSuccessor; fPredecessor = fSuccessor; fSuccessor = fWordIterator.next(); } /** * Returns the next sentence break. * * @return The next sentence break */ protected final int nextSentence() { return ((Integer) fSentenceBreaks.getFirst()).intValue(); } /** * Determines the next token to be spell-checked. * * @return The next token to be spell-checked, or null iff * the next token is not a candidate for spell-checking. */ protected String nextToken() { String token = null; fPrevious = fPredecessor; fStartsSentence = false; nextBreak(); boolean update = false; if (fNext - fPrevious > 0) { if (fSuccessor != BreakIterator.DONE && fContent.charAt(fPrevious) == JAVADOC_TAG_PREFIX) { nextBreak(); if (Character.isLetter(fContent.charAt(fPrevious + 1))) { update = true; token = fContent.substring(fPrevious, fNext); } else fPredecessor = fNext; } else if (fSuccessor != BreakIterator.DONE && fContent.charAt(fPrevious) == HTML_TAG_PREFIX && (Character.isLetter(fContent.charAt(fNext)) || fContent .charAt(fNext) == '/')) { if (fContent.startsWith(HTML_CLOSE_PREFIX, fPrevious)) nextBreak(); nextBreak(); if (fSuccessor != BreakIterator.DONE && fContent.charAt(fNext) == HTML_TAG_POSTFIX) { nextBreak(); if (fSuccessor != BreakIterator.DONE) { update = true; token = fContent.substring(fPrevious, fNext); } } } else if (!isWhitespace(fPrevious, fNext) && isAlphaNumeric(fPrevious, fNext)) { if (isUrlToken(fPrevious)) skipTokens(fPrevious, ' '); else if (isJavadocToken(JAVADOC_PARAM_TAGS)) fLastToken = null; else if (isJavadocToken(JAVADOC_REFERENCE_TAGS)) { fLastToken = null; skipTokens(fPrevious, fDelimiter.charAt(0)); } else if (fNext - fPrevious > 1 || isSingleLetter(fPrevious)) token = fContent.substring(fPrevious, fNext); update = true; } } if (update && fSentenceBreaks.size() > 0) { if (fPrevious >= nextSentence()) { while (fSentenceBreaks.size() > 0 && fPrevious >= nextSentence()) fSentenceBreaks.removeFirst(); fStartsSentence = (fLastToken == null) || (token != null); } } return token; } /* * @see java.util.Iterator#remove() */ public final void remove() { throw new UnsupportedOperationException(); } /** * Skip the tokens until the stop character is reached. * * @param begin * The begin index * @param stop * The stop character */ protected final void skipTokens(final int begin, final char stop) { int end = begin; while (end < fContent.length() && fContent.charAt(end) != stop) end++; if (end < fContent.length()) { fNext = end; fPredecessor = fNext; fSuccessor = fWordIterator.following(fNext); } else fSuccessor = BreakIterator.DONE; } /* * @see org.eclipse.spelling.done.ISpellCheckIterator#startsSentence() */ public final boolean startsSentence() { return fStartsSentence; } }