1 /*******************************************************************************
2 * Copyright (c) 2000, 2004 IBM Corporation and others.
3 * All rights reserved. This program and the accompanying materials
4 * are made available under the terms of the Common Public License v1.0
5 * which accompanies this distribution, and is available at
6 * http://www.eclipse.org/legal/cpl-v10.html
9 * IBM Corporation - initial API and implementation
10 *******************************************************************************/
11 package net.sourceforge.phpdt.internal.ui.text;
13 import java.util.Arrays;
15 import net.sourceforge.phpdt.internal.compiler.parser.Scanner;
16 import net.sourceforge.phpeclipse.phpeditor.php.PHPDocumentPartitioner;
18 import org.eclipse.jface.text.Assert;
19 import org.eclipse.jface.text.BadLocationException;
20 import org.eclipse.jface.text.IDocument;
21 import org.eclipse.jface.text.IRegion;
22 import org.eclipse.jface.text.ITypedRegion;
23 import org.eclipse.jface.text.Region;
24 import org.eclipse.jface.text.TextUtilities;
27 * Utility methods for heuristic based Java manipulations in an incomplete Java
31 * An instance holds some internal position in the document and is therefore not
37 public class JavaHeuristicScanner implements Symbols {
39 * Returned by all methods when the requested position could not be found,
40 * or if a {@link BadLocationException} was thrown while scanning.
42 public static final int NOT_FOUND = -1;
45 * Special bound parameter that means either -1 (backward scanning) or
46 * <code>fDocument.getLength()</code> (forward scanning).
48 public static final int UNBOUND = -2;
50 /* character constants */
51 private static final char LBRACE = '{';
53 private static final char RBRACE = '}';
55 private static final char LPAREN = '(';
57 private static final char RPAREN = ')';
59 private static final char SEMICOLON = ';';
61 private static final char COLON = ':';
63 private static final char COMMA = ',';
65 private static final char LBRACKET = '[';
67 private static final char RBRACKET = ']';
69 private static final char QUESTIONMARK = '?';
71 private static final char EQUAL = '=';
74 * Specifies the stop condition, upon which the <code>scanXXX</code>
75 * methods will decide whether to keep scanning or not. This interface may
76 * implemented by clients.
78 public interface StopCondition {
80 * Instructs the scanner to return the current position.
83 * the char at the current position
85 * the current position
87 * the iteration direction
88 * @return <code>true</code> if the stop condition is met.
90 boolean stop(char ch, int position, boolean forward);
94 * Stops upon a non-whitespace (as defined by
95 * {@link Character#isWhitespace(char)}) character.
97 private static class NonWhitespace implements StopCondition {
99 * @see net.sourceforge.phpdt.internal.ui.text.JavaHeuristicScanner.StopCondition#stop(char)
101 public boolean stop(char ch, int position, boolean forward) {
102 return !Character.isWhitespace(ch);
107 * Stops upon a non-whitespace character in the default partition.
111 private class NonWhitespaceDefaultPartition extends NonWhitespace {
113 * @see net.sourceforge.phpdt.internal.ui.text.JavaHeuristicScanner.StopCondition#stop(char)
115 public boolean stop(char ch, int position, boolean forward) {
116 return super.stop(ch, position, true)
117 && isDefaultPartition(position);
122 * Stops upon a non-java identifier (as defined by
123 * {@link Scanner#isPHPIdentifierPart(char)}) character.
125 private static class NonJavaIdentifierPart implements StopCondition {
127 * @see net.sourceforge.phpdt.internal.ui.text.JavaHeuristicScanner.StopCondition#stop(char)
129 public boolean stop(char ch, int position, boolean forward) {
130 return !Scanner.isPHPIdentifierPart(ch);
135 * Stops upon a non-java identifier character in the default partition.
137 * @see NonJavaIdentifierPart
139 private class NonJavaIdentifierPartDefaultPartition extends
140 NonJavaIdentifierPart {
142 * @see net.sourceforge.phpdt.internal.ui.text.JavaHeuristicScanner.StopCondition#stop(char)
144 public boolean stop(char ch, int position, boolean forward) {
145 return super.stop(ch, position, true)
146 || !isDefaultPartition(position);
151 * Stops upon a character in the default partition that matches the given
154 private class CharacterMatch implements StopCondition {
155 private final char[] fChars;
158 * Creates a new instance.
161 * the single character to match
163 public CharacterMatch(char ch) {
164 this(new char[] { ch });
168 * Creates a new instance.
171 * the chars to match.
173 public CharacterMatch(char[] chars) {
174 Assert.isNotNull(chars);
175 Assert.isTrue(chars.length > 0);
181 * @see net.sourceforge.phpdt.internal.ui.text.JavaHeuristicScanner.StopCondition#stop(char,
184 public boolean stop(char ch, int position, boolean forward) {
185 return Arrays.binarySearch(fChars, ch) >= 0
186 && isDefaultPartition(position);
191 * Acts like character match, but skips all scopes introduced by
192 * parenthesis, brackets, and braces.
194 protected class SkippingScopeMatch extends CharacterMatch {
195 private char fOpening, fClosing;
197 private int fDepth = 0;
200 * Creates a new instance.
203 * the single character to match
205 public SkippingScopeMatch(char ch) {
210 * Creates a new instance.
213 * the chars to match.
215 public SkippingScopeMatch(char[] chars) {
220 * @see net.sourceforge.phpdt.internal.ui.text.JavaHeuristicScanner.StopCondition#stop(char,
223 public boolean stop(char ch, int position, boolean forward) {
225 if (fDepth == 0 && super.stop(ch, position, true))
227 else if (ch == fOpening)
229 else if (ch == fClosing) {
235 } else if (fDepth == 0) {
279 /** The document being scanned. */
280 private IDocument fDocument;
282 /** The partitioning being used for scanning. */
283 private String fPartitioning;
285 /** The partition to scan in. */
286 private String fPartition;
288 /* internal scan state */
290 /** the most recently read character. */
293 /** the most recently read position. */
296 /* preset stop conditions */
297 private final StopCondition fNonWSDefaultPart = new NonWhitespaceDefaultPartition();
299 private final static StopCondition fNonWS = new NonWhitespace();
301 private final StopCondition fNonIdent = new NonJavaIdentifierPartDefaultPartition();
304 * Creates a new instance.
307 * the document to scan
308 * @param partitioning
309 * the partitioning to use for scanning
311 * the partition to scan in
313 public JavaHeuristicScanner(IDocument document, String partitioning,
315 Assert.isNotNull(document);
316 Assert.isNotNull(partitioning);
317 Assert.isNotNull(partition);
318 fDocument = document;
319 fPartitioning = partitioning;
320 fPartition = partition;
325 * <code>this(document, IJavaPartitions.JAVA_PARTITIONING, IDocument.DEFAULT_CONTENT_TYPE)</code>.
328 * the document to scan.
330 public JavaHeuristicScanner(IDocument document) {
331 // this(document, IPHPPartitions.PHP_PARTITIONING,
332 // IDocument.DEFAULT_CONTENT_TYPE);
333 this(document, IPHPPartitions.PHP_PARTITIONING,
334 PHPDocumentPartitioner.PHP_SCRIPT_CODE);
338 * Returns the most recent internal scan position.
340 * @return the most recent internal scan position.
342 public int getPosition() {
347 * Returns the next token in forward direction, starting at
348 * <code>start</code>, and not extending further than <code>bound</code>.
349 * The return value is one of the constants defined in {@link Symbols}.
350 * After a call, {@link #getPosition()} will return the position just after
351 * the scanned token (i.e. the next position that will be scanned).
354 * the first character position in the document to consider
356 * the first position not to consider any more
357 * @return a constant from {@link Symbols} describing the next token
359 public int nextToken(int start, int bound) {
360 int pos = scanForward(start, bound, fNonWSDefaultPart);
361 if (pos == NOT_FOUND)
372 return TokenLBRACKET;
374 return TokenRBRACKET;
380 return TokenSEMICOLON;
384 return TokenQUESTIONMARK;
387 if (fDocument.getChar(fPos) == '>') {
391 } catch (BadLocationException e) {
396 if (fDocument.get(fPos, 4).equalsIgnoreCase("?php")) {
399 } else if (fDocument.getChar(fPos) == '?') {
403 } catch (BadLocationException e) {
408 if (Scanner.isPHPIdentifierPart(fChar)) {
409 // assume an ident or keyword
411 pos = scanForward(pos + 1, bound, fNonIdent);
412 if (pos == NOT_FOUND)
413 to = bound == UNBOUND ? fDocument.getLength() : bound;
417 String identOrKeyword;
419 identOrKeyword = fDocument.get(from, to - from);
420 } catch (BadLocationException e) {
424 return getToken(identOrKeyword);
427 // operators, number literals etc
433 * Returns the next token in backward direction, starting at
434 * <code>start</code>, and not extending further than <code>bound</code>.
435 * The return value is one of the constants defined in {@link Symbols}.
436 * After a call, {@link #getPosition()} will return the position just before
437 * the scanned token starts (i.e. the next position that will be scanned).
440 * the first character position in the document to consider
442 * the first position not to consider any more
443 * @return a constant from {@link Symbols} describing the previous token
445 public int previousToken(int start, int bound) {
446 int pos = scanBackward(start, bound, fNonWSDefaultPart);
447 if (pos == NOT_FOUND)
458 return TokenLBRACKET;
460 return TokenRBRACKET;
466 return TokenSEMICOLON;
472 return TokenQUESTIONMARK;
477 switch (fDocument.getChar(fPos)) {
485 } catch (BadLocationException e) {
490 if (Scanner.isPHPIdentifierPart(fChar)) {
491 // assume an ident or keyword
492 int from, to = pos + 1;
493 pos = scanBackward(pos - 1, bound, fNonIdent);
494 if (pos == NOT_FOUND)
495 from = bound == UNBOUND ? 0 : bound + 1;
499 String identOrKeyword;
501 identOrKeyword = fDocument.get(from, to - from);
502 } catch (BadLocationException e) {
506 return getToken(identOrKeyword);
509 // operators, number literals etc
516 * Returns one of the keyword constants or <code>TokenIDENT</code> for a
517 * scanned identifier.
520 * a scanned identifier
521 * @return one of the constants defined in {@link Symbols}
523 private int getToken(String s) {
526 switch (s.length()) {
528 if ("if".equals(s)) //$NON-NLS-1$
530 if ("do".equals(s)) //$NON-NLS-1$
534 if ("for".equals(s)) //$NON-NLS-1$
536 if ("try".equals(s)) //$NON-NLS-1$
538 if ("new".equals(s)) //$NON-NLS-1$
542 if ("case".equals(s)) //$NON-NLS-1$
544 if ("else".equals(s)) //$NON-NLS-1$
546 if ("goto".equals(s)) //$NON-NLS-1$
550 if ("break".equals(s)) //$NON-NLS-1$
552 if ("catch".equals(s)) //$NON-NLS-1$
554 if ("while".equals(s)) //$NON-NLS-1$
558 if ("return".equals(s)) //$NON-NLS-1$
560 if ("static".equals(s)) //$NON-NLS-1$
562 if ("switch".equals(s)) //$NON-NLS-1$
566 if ("default".equals(s)) //$NON-NLS-1$
568 if ("finally".equals(s)) //$NON-NLS-1$
572 if ("synchronized".equals(s)) //$NON-NLS-1$
573 return TokenSYNCHRONIZED;
580 * Returns the position of the closing peer character (forward search). Any
581 * scopes introduced by opening peers are skipped. All peers accounted for
582 * must reside in the default partition.
585 * Note that <code>start</code> must not point to the opening peer, but to
586 * the first character being searched.
592 * the opening peer character (e.g. '{')
594 * the closing peer character (e.g. '}')
595 * @return the matching peer character position, or <code>NOT_FOUND</code>
597 public int findClosingPeer(int start, final char openingPeer,
598 final char closingPeer) {
599 Assert.isNotNull(fDocument);
600 Assert.isTrue(start >= 0);
606 start = scanForward(start + 1, UNBOUND, new CharacterMatch(
607 new char[] { openingPeer, closingPeer }));
608 if (start == NOT_FOUND)
611 if (fDocument.getChar(start) == openingPeer)
620 } catch (BadLocationException e) {
626 * Returns the position of the opening peer character (backward search). Any
627 * scopes introduced by closing peers are skipped. All peers accounted for
628 * must reside in the default partition.
631 * Note that <code>start</code> must not point to the closing peer, but to
632 * the first character being searched.
638 * the opening peer character (e.g. '{')
640 * the closing peer character (e.g. '}')
641 * @return the matching peer character position, or <code>NOT_FOUND</code>
643 public int findOpeningPeer(int start, char openingPeer, char closingPeer) {
644 Assert.isTrue(start < fDocument.getLength());
650 start = scanBackward(start - 1, UNBOUND, new CharacterMatch(
651 new char[] { openingPeer, closingPeer }));
652 if (start == NOT_FOUND)
655 if (fDocument.getChar(start) == closingPeer)
664 } catch (BadLocationException e) {
670 * Computes the surrounding block around <code>offset</code>. The search
671 * is started at the beginning of <code>offset</code>, i.e. an opening
672 * brace at <code>offset</code> will not be part of the surrounding block,
673 * but a closing brace will.
676 * the offset for which the surrounding block is computed
677 * @return a region describing the surrounding block, or <code>null</code>
678 * if none can be found
680 public IRegion findSurroundingBlock(int offset) {
681 if (offset < 1 || offset >= fDocument.getLength())
684 int begin = findOpeningPeer(offset - 1, LBRACE, RBRACE);
685 int end = findClosingPeer(offset, LBRACE, RBRACE);
686 if (begin == NOT_FOUND || end == NOT_FOUND)
688 return new Region(begin, end + 1 - begin);
692 * Finds the smallest position in <code>fDocument</code> such that the
693 * position is >= <code>position</code> and < <code>bound</code>
694 * and <code>Character.isWhitespace(fDocument.getChar(pos))</code>
695 * evaluates to <code>false</code> and the position is in the default
699 * the first character position in <code>fDocument</code> to be
702 * the first position in <code>fDocument</code> to not consider
703 * any more, with <code>bound</code> > <code>position</code>,
704 * or <code>UNBOUND</code>
705 * @return the smallest position of a non-whitespace character in [<code>position</code>,
706 * <code>bound</code>) that resides in a Java partition, or
707 * <code>NOT_FOUND</code> if none can be found
709 public int findNonWhitespaceForward(int position, int bound) {
710 return scanForward(position, bound, fNonWSDefaultPart);
714 * Finds the smallest position in <code>fDocument</code> such that the
715 * position is >= <code>position</code> and < <code>bound</code>
716 * and <code>Character.isWhitespace(fDocument.getChar(pos))</code>
717 * evaluates to <code>false</code>.
720 * the first character position in <code>fDocument</code> to be
723 * the first position in <code>fDocument</code> to not consider
724 * any more, with <code>bound</code> > <code>position</code>,
725 * or <code>UNBOUND</code>
726 * @return the smallest position of a non-whitespace character in [<code>position</code>,
727 * <code>bound</code>), or <code>NOT_FOUND</code> if none can
730 public int findNonWhitespaceForwardInAnyPartition(int position, int bound) {
731 return scanForward(position, bound, fNonWS);
735 * Finds the highest position in <code>fDocument</code> such that the
736 * position is <= <code>position</code> and > <code>bound</code>
737 * and <code>Character.isWhitespace(fDocument.getChar(pos))</code>
738 * evaluates to <code>false</code> and the position is in the default
742 * the first character position in <code>fDocument</code> to be
745 * the first position in <code>fDocument</code> to not consider
746 * any more, with <code>bound</code> < <code>position</code>,
747 * or <code>UNBOUND</code>
748 * @return the highest position of a non-whitespace character in (<code>bound</code>,
749 * <code>position</code>] that resides in a Java partition, or
750 * <code>NOT_FOUND</code> if none can be found
752 public int findNonWhitespaceBackward(int position, int bound) {
753 return scanBackward(position, bound, fNonWSDefaultPart);
757 * Finds the lowest position <code>p</code> in <code>fDocument</code>
758 * such that <code>start</code> <= p < <code>bound</code> and
759 * <code>condition.stop(fDocument.getChar(p), p)</code> evaluates to
763 * the first character position in <code>fDocument</code> to be
766 * the first position in <code>fDocument</code> to not consider
767 * any more, with <code>bound</code> > <code>start</code>,
768 * or <code>UNBOUND</code>
770 * the <code>StopCondition</code> to check
771 * @return the lowest position in [<code>start</code>,
772 * <code>bound</code>) for which <code>condition</code> holds,
773 * or <code>NOT_FOUND</code> if none can be found
775 public int scanForward(int start, int bound, StopCondition condition) {
776 Assert.isTrue(start >= 0);
778 if (bound == UNBOUND)
779 bound = fDocument.getLength();
781 Assert.isTrue(bound <= fDocument.getLength());
785 while (fPos < bound) {
787 fChar = fDocument.getChar(fPos);
790 if (fPos < fDocument.getLength() - 1) {
791 if (fDocument.get(fPos - 1, 2).equalsIgnoreCase("?>")) {
797 if (condition.stop(fChar, fPos, true))
802 } catch (BadLocationException e) {
808 * Finds the lowest position in <code>fDocument</code> such that the
809 * position is >= <code>position</code> and < <code>bound</code>
810 * and <code>fDocument.getChar(position) == ch</code> evaluates to
811 * <code>true</code> and the position is in the default partition.
814 * the first character position in <code>fDocument</code> to be
817 * the first position in <code>fDocument</code> to not consider
818 * any more, with <code>bound</code> > <code>position</code>,
819 * or <code>UNBOUND</code>
821 * the <code>char</code> to search for
822 * @return the lowest position of <code>ch</code> in (<code>bound</code>,
823 * <code>position</code>] that resides in a Java partition, or
824 * <code>NOT_FOUND</code> if none can be found
826 public int scanForward(int position, int bound, char ch) {
827 return scanForward(position, bound, new CharacterMatch(ch));
831 * Finds the lowest position in <code>fDocument</code> such that the
832 * position is >= <code>position</code> and < <code>bound</code>
833 * and <code>fDocument.getChar(position) == ch</code> evaluates to
834 * <code>true</code> for at least one ch in <code>chars</code> and the
835 * position is in the default partition.
838 * the first character position in <code>fDocument</code> to be
841 * the first position in <code>fDocument</code> to not consider
842 * any more, with <code>bound</code> > <code>position</code>,
843 * or <code>UNBOUND</code>
845 * an array of <code>char</code> to search for
846 * @return the lowest position of a non-whitespace character in [<code>position</code>,
847 * <code>bound</code>) that resides in a Java partition, or
848 * <code>NOT_FOUND</code> if none can be found
850 public int scanForward(int position, int bound, char[] chars) {
851 return scanForward(position, bound, new CharacterMatch(chars));
855 * Finds the highest position <code>p</code> in <code>fDocument</code>
856 * such that <code>bound</code> < <code>p</code> <=
857 * <code>start</code> and
858 * <code>condition.stop(fDocument.getChar(p), p)</code> evaluates to
862 * the first character position in <code>fDocument</code> to be
865 * the first position in <code>fDocument</code> to not consider
866 * any more, with <code>bound</code> < <code>start</code>,
867 * or <code>UNBOUND</code>
869 * the <code>StopCondition</code> to check
870 * @return the highest position in (<code>bound</code>,
871 * <code>start</code> for which <code>condition</code> holds, or
872 * <code>NOT_FOUND</code> if none can be found
874 public int scanBackward(int start, int bound, StopCondition condition) {
875 if (bound == UNBOUND)
878 Assert.isTrue(bound >= -1);
879 Assert.isTrue(start < fDocument.getLength());
883 while (fPos > bound) {
885 fChar = fDocument.getChar(fPos);
887 if (fChar == 'p' || fChar == 'P') {
889 if (fDocument.get(fPos - 4, 5).equalsIgnoreCase("<?php")) {
894 } else if (fChar == '?') {
896 if (fDocument.get(fPos - 1, 2).equalsIgnoreCase("<?")) {
902 if (condition.stop(fChar, fPos, false))
907 } catch (BadLocationException e) {
913 * Finds the highest position in <code>fDocument</code> such that the
914 * position is <= <code>position</code> and > <code>bound</code>
915 * and <code>fDocument.getChar(position) == ch</code> evaluates to
916 * <code>true</code> for at least one ch in <code>chars</code> and the
917 * position is in the default partition.
920 * the first character position in <code>fDocument</code> to be
923 * the first position in <code>fDocument</code> to not consider
924 * any more, with <code>bound</code> < <code>position</code>,
925 * or <code>UNBOUND</code>
927 * the <code>char</code> to search for
928 * @return the highest position of one element in <code>chars</code> in (<code>bound</code>,
929 * <code>position</code>] that resides in a Java partition, or
930 * <code>NOT_FOUND</code> if none can be found
932 public int scanBackward(int position, int bound, char ch) {
933 return scanBackward(position, bound, new CharacterMatch(ch));
937 * Finds the highest position in <code>fDocument</code> such that the
938 * position is <= <code>position</code> and > <code>bound</code>
939 * and <code>fDocument.getChar(position) == ch</code> evaluates to
940 * <code>true</code> for at least one ch in <code>chars</code> and the
941 * position is in the default partition.
944 * the first character position in <code>fDocument</code> to be
947 * the first position in <code>fDocument</code> to not consider
948 * any more, with <code>bound</code> < <code>position</code>,
949 * or <code>UNBOUND</code>
951 * an array of <code>char</code> to search for
952 * @return the highest position of one element in <code>chars</code> in (<code>bound</code>,
953 * <code>position</code>] that resides in a Java partition, or
954 * <code>NOT_FOUND</code> if none can be found
956 public int scanBackward(int position, int bound, char[] chars) {
957 return scanBackward(position, bound, new CharacterMatch(chars));
961 * Checks whether <code>position</code> resides in a default (Java)
962 * partition of <code>fDocument</code>.
965 * the position to be checked
966 * @return <code>true</code> if <code>position</code> is in the default
967 * partition of <code>fDocument</code>, <code>false</code>
970 public boolean isDefaultPartition(int position) {
971 Assert.isTrue(position >= 0);
972 Assert.isTrue(position <= fDocument.getLength());
975 ITypedRegion region = TextUtilities.getPartition(fDocument,
976 fPartitioning, position, false);
977 return region.getType().equals(fPartition);
979 } catch (BadLocationException e) {
986 * Checks if the line seems to be an open condition not followed by a block
987 * (i.e. an if, while, or for statement with just one following statement,
988 * see example below).
996 * Algorithm: if the last non-WS, non-Comment code on the line is an if
997 * (condition), while (condition), for( expression), do, else, and there is
998 * no statement after that
1002 * the insert position of the new character
1004 * the lowest position to consider
1005 * @return <code>true</code> if the code is a conditional statement or
1006 * loop without a block, <code>false</code> otherwise
1008 public boolean isBracelessBlockStart(int position, int bound) {
1012 switch (previousToken(position, bound)) {
1017 position = findOpeningPeer(fPos, LPAREN, RPAREN);
1019 switch (previousToken(position - 1, bound)) {