1 /**********************************************************************
2 Copyright (c) 2002 IBM Corp. and others.
3 All rights reserved. This program and the accompanying materials
4 are made available under the terms of the Common Public License v0.5
5 which accompanies this distribution, and is available at
6 http://www.eclipse.org/legal/cpl-v05.html
9 IBM Corporation - initial API and implementation
10 **********************************************************************/
12 package net.sourceforge.phpdt.core.compiler;
16 * Definition of a Java scanner, as returned by the <code>ToolFactory</code>.
17 * The scanner is responsible for tokenizing a given source, providing information about
18 * the nature of the token read, its positions and source equivalent.
20 * When the scanner has finished tokenizing, it answers an EOF token (<code>
21 * ITerminalSymbols#TokenNameEOF</code>.
23 * When encountering lexical errors, an <code>InvalidInputException</code> is thrown.
25 * @see org.eclipse.jdt.core.ToolFactory
26 * @see ITerminalSymbols
29 public interface IScanner {
32 * Answers the current identifier source, after unicode escape sequences have
33 * been translated into unicode characters.
34 * e.g. if original source was <code>\\u0061bc</code> then it will answer <code>abc</code>.
36 * @return the current identifier source, after unicode escape sequences have
37 * been translated into unicode characters
39 char[] getCurrentTokenSource();
42 * Answers the starting position of the current token inside the original source.
43 * This position is zero-based and inclusive. It corresponds to the position of the first character
44 * which is part of this token. If this character was a unicode escape sequence, it points at the first
45 * character of this sequence.
47 * @return the starting position of the current token inside the original source
49 int getCurrentTokenStartPosition();
52 * Answers the ending position of the current token inside the original source.
53 * This position is zero-based and inclusive. It corresponds to the position of the last character
54 * which is part of this token. If this character was a unicode escape sequence, it points at the last
55 * character of this sequence.
57 * @return the ending position of the current token inside the original source
59 int getCurrentTokenEndPosition();
62 * Answers the starting position of a given line number. This line has to have been encountered
63 * already in the tokenization process (i.e. it cannot be used to compute positions of lines beyond
64 * current token). Once the entire source has been processed, it can be used without any limit.
65 * Line starting positions are zero-based, and start immediately after the previous line separator (if any).
67 * @param lineNumber the given line number
68 * @return the starting position of a given line number
70 int getLineStart(int lineNumber);
73 * Answers the ending position of a given line number. This line has to have been encountered
74 * already in the tokenization process (i.e. it cannot be used to compute positions of lines beyond
75 * current token). Once the entire source has been processed, it can be used without any limit.
76 * Line ending positions are zero-based, and correspond to the last character of the line separator
77 * (in case multi-character line separators).
79 * @param lineNumber the given line number
80 * @return the ending position of a given line number
82 int getLineEnd(int lineNumber);
85 * Answers an array of the ending positions of the lines encountered so far. Line ending positions
86 * are zero-based, and correspond to the last character of the line separator (in case multi-character
89 * @return an array of the ending positions of the lines encountered so far
94 * Answers a 1-based line number using the lines which have been encountered so far. If the position
95 * is located beyond the current scanned line, then the last line number will be answered.
97 * @param charPosition the given character position
98 * @return a 1-based line number using the lines which have been encountered so far
100 int getLineNumber(int charPosition);
103 * Read the next token in the source, and answers its ID as specified by <code>ITerminalSymbols</code>.
104 * Note that the actual token ID values are subject to change if new keywords were added to the language
105 * (i.e. 'assert' keyword in 1.4).
107 * @throws InvalidInputException - in case a lexical error was detected while reading the current token
109 int getNextToken() throws InvalidInputException;
112 * Answers the original source being processed (not a copy of it).
114 * @return the original source being processed
119 * Reposition the scanner on some portion of the original source. Once reaching the given <code>endPosition</code>
120 * it will answer EOF tokens (<code>ITerminalSymbols.TokenNameEOF</code>).
122 * @param startPosition the given start position
123 * @param endPosition the given end position
125 void resetTo(int startPosition, int endPosition);
128 * Set the scanner source to process. By default, the scanner will consider starting at the beginning of the
129 * source until it reaches its end.
131 * @param source the given source
133 void setSource(char[] source);