1 /*******************************************************************************
2 * Copyright (c) 2000, 2003 IBM Corporation and others.
3 * All rights reserved. This program and the accompanying materials
4 * are made available under the terms of the Common Public License v1.0
5 * which accompanies this distribution, and is available at
6 * http://www.eclipse.org/legal/cpl-v10.html
9 * IBM Corporation - initial API and implementation
10 *******************************************************************************/
12 package net.sourceforge.phpdt.internal.ui.text.spelling;
14 import java.text.BreakIterator;
15 import java.util.LinkedList;
16 import java.util.Locale;
18 import net.sourceforge.phpdt.corext.refactoring.nls.NLSElement;
19 import net.sourceforge.phpdt.internal.ui.text.phpdoc.IHtmlTagConstants;
20 import net.sourceforge.phpdt.internal.ui.text.phpdoc.IJavaDocTagConstants;
21 import net.sourceforge.phpdt.internal.ui.text.spelling.engine.DefaultSpellChecker;
22 import net.sourceforge.phpdt.internal.ui.text.spelling.engine.ISpellCheckIterator;
24 import org.eclipse.jface.text.IDocument;
25 import org.eclipse.jface.text.IRegion;
26 import org.eclipse.jface.text.TextUtilities;
29 * Iterator to spell-check javadoc comment regions.
33 public class SpellCheckIterator implements ISpellCheckIterator, IJavaDocTagConstants, IHtmlTagConstants {
35 /** The content of the region */
36 private final String fContent;
38 /** The line delimiter */
39 private final String fDelimiter;
42 private String fLastToken= null;
47 /** The offset of the region */
48 private final int fOffset;
50 /** The predecessor break */
51 private int fPredecessor;
53 /** The previous break */
54 private int fPrevious= 0;
56 /** The sentence breaks */
57 private final LinkedList fSentenceBreaks= new LinkedList();
59 /** Does the current word start a sentence? */
60 private boolean fStartsSentence= false;
62 /** The successor break */
63 private int fSuccessor;
65 /** The word iterator */
66 private final BreakIterator fWordIterator;
69 * Creates a new spell check iterator.
72 * The document containing the specified partition
74 * The region to spell-check
76 * The locale to use for spell-checking
78 public SpellCheckIterator(final IDocument document, final IRegion region, final Locale locale) {
80 fOffset= region.getOffset();
81 fWordIterator= BreakIterator.getWordInstance(locale);
82 fDelimiter= TextUtilities.getDefaultLineDelimiter(document);
87 content= document.get(region.getOffset(), region.getLength());
88 if (content.startsWith(NLSElement.TAG_PREFIX))
89 content= ""; //$NON-NLS-1$
91 } catch (Exception exception) {
92 content= ""; //$NON-NLS-1$
96 fWordIterator.setText(content);
97 fPredecessor= fWordIterator.first();
98 fSuccessor= fWordIterator.next();
100 final BreakIterator iterator= BreakIterator.getSentenceInstance(locale);
101 iterator.setText(content);
103 int offset= iterator.current();
104 while (offset != BreakIterator.DONE) {
106 fSentenceBreaks.add(new Integer(offset));
107 offset= iterator.next();
112 * @see org.eclipse.spelling.done.ISpellCheckIterator#getBegin()
114 public final int getBegin() {
115 return fPrevious + fOffset;
119 * @see org.eclipse.spelling.done.ISpellCheckIterator#getEnd()
121 public final int getEnd() {
122 return fNext + fOffset - 1;
126 * @see java.util.Iterator#hasNext()
128 public final boolean hasNext() {
129 return fSuccessor != BreakIterator.DONE;
133 * Does the specified token consist of at least one letter and digits only?
139 * @return <code>true</code> iff the token consists of digits and at
140 * least one letter only, <code>false</code> otherwise
142 protected final boolean isAlphaNumeric(final int begin, final int end) {
146 boolean letter= false;
147 for (int index= begin; index < end; index++) {
149 character= fContent.charAt(index);
150 if (Character.isLetter(character))
153 if (!Character.isLetterOrDigit(character))
160 * Was the last token a Javadoc tag tag?
163 * The javadoc tags to check
164 * @return <code>true</code> iff the last token was a Javadoc tag, <code>false</code>
167 protected final boolean isJavadocToken(final String[] tags) {
169 if (fLastToken != null) {
171 for (int index= 0; index < tags.length; index++) {
173 if (fLastToken.equals(tags[index]))
181 * Is the current token a single letter token surrounded by non-whitespace
186 * @return <code>true</code> iff the token is a single letter token,
187 * <code>false</code> otherwise
189 protected final boolean isSingleLetter(final int begin) {
191 if (begin > 0 && begin < fContent.length() - 1)
192 return Character.isWhitespace(fContent.charAt(begin - 1)) && Character.isLetter(fContent.charAt(begin)) && Character.isWhitespace(fContent.charAt(begin + 1));
198 * Does the specified token look like an URL?
202 * @return <code>true</code> iff this token look like an URL, <code>false</code>
205 protected final boolean isUrlToken(final int begin) {
207 for (int index= 0; index < DefaultSpellChecker.URL_PREFIXES.length; index++) {
209 if (fContent.startsWith(DefaultSpellChecker.URL_PREFIXES[index], begin))
216 * Does the specified token consist of whitespace only?
222 * @return <code>true</code> iff the token consists of whitespace only,
223 * <code>false</code> otherwise
225 protected final boolean isWhitespace(final int begin, final int end) {
227 for (int index= begin; index < end; index++) {
229 if (!Character.isWhitespace(fContent.charAt(index)))
236 * @see java.util.Iterator#next()
238 public final Object next() {
240 String token= nextToken();
241 while (token == null && fSuccessor != BreakIterator.DONE)
250 * Advances the end index to the next word break.
252 protected final void nextBreak() {
255 fPredecessor= fSuccessor;
257 fSuccessor= fWordIterator.next();
261 * Returns the next sentence break.
263 * @return The next sentence break
265 protected final int nextSentence() {
266 return ((Integer) fSentenceBreaks.getFirst()).intValue();
270 * Determines the next token to be spell-checked.
272 * @return The next token to be spell-checked, or <code>null</code> iff
273 * the next token is not a candidate for spell-checking.
275 protected String nextToken() {
279 fPrevious= fPredecessor;
280 fStartsSentence= false;
284 boolean update= false;
285 if (fNext - fPrevious > 0) {
287 if (fSuccessor != BreakIterator.DONE && fContent.charAt(fPrevious) == JAVADOC_TAG_PREFIX) {
290 if (Character.isLetter(fContent.charAt(fPrevious + 1))) {
292 token= fContent.substring(fPrevious, fNext);
296 } else if (fSuccessor != BreakIterator.DONE && fContent.charAt(fPrevious) == HTML_TAG_PREFIX && (Character.isLetter(fContent.charAt(fNext)) || fContent.charAt(fNext) == '/')) {
298 if (fContent.startsWith(HTML_CLOSE_PREFIX, fPrevious))
303 if (fSuccessor != BreakIterator.DONE && fContent.charAt(fNext) == HTML_TAG_POSTFIX) {
306 if (fSuccessor != BreakIterator.DONE) {
308 token= fContent.substring(fPrevious, fNext);
311 } else if (!isWhitespace(fPrevious, fNext) && isAlphaNumeric(fPrevious, fNext)) {
313 if (isUrlToken(fPrevious))
314 skipTokens(fPrevious, ' ');
315 else if (isJavadocToken(JAVADOC_PARAM_TAGS))
317 else if (isJavadocToken(JAVADOC_REFERENCE_TAGS)) {
319 skipTokens(fPrevious, fDelimiter.charAt(0));
320 } else if (fNext - fPrevious > 1 || isSingleLetter(fPrevious))
321 token= fContent.substring(fPrevious, fNext);
327 if (update && fSentenceBreaks.size() > 0) {
329 if (fPrevious >= nextSentence()) {
331 while (fSentenceBreaks.size() > 0 && fPrevious >= nextSentence())
332 fSentenceBreaks.removeFirst();
334 fStartsSentence= (fLastToken == null) || (token != null);
341 * @see java.util.Iterator#remove()
343 public final void remove() {
344 throw new UnsupportedOperationException();
348 * Skip the tokens until the stop character is reached.
355 protected final void skipTokens(final int begin, final char stop) {
359 while (end < fContent.length() && fContent.charAt(end) != stop)
362 if (end < fContent.length()) {
367 fSuccessor= fWordIterator.following(fNext);
369 fSuccessor= BreakIterator.DONE;
373 * @see org.eclipse.spelling.done.ISpellCheckIterator#startsSentence()
375 public final boolean startsSentence() {
376 return fStartsSentence;