/**********************************************************************
Copyright (c) 2002 IBM Corp. and others.
All rights reserved. � This program and the accompanying materials
are made available under the terms of the Common Public License v0.5
which accompanies this distribution, and is available at
http://www.eclipse.org/legal/cpl-v05.html
�
Contributors:
IBM Corporation - initial API and implementation
**********************************************************************/
package net.sourceforge.phpdt.core.compiler;
/**
* Definition of a Java scanner, as returned by the ToolFactory
.
* The scanner is responsible for tokenizing a given source, providing information about
* the nature of the token read, its positions and source equivalent.
*
* When the scanner has finished tokenizing, it answers an EOF token (
* ITerminalSymbols#TokenNameEOF
.
*
* When encountering lexical errors, an InvalidInputException
is thrown.
*
* @see net.sourceforge.phpdt.core.ToolFactory
* @see ITerminalSymbols
* @since 2.0
*/
public interface IScanner {
/**
* Answers the current identifier source, after unicode escape sequences have
* been translated into unicode characters.
* e.g. if original source was \\u0061bc
then it will answer abc
.
*
* @return the current identifier source, after unicode escape sequences have
* been translated into unicode characters
*/
char[] getCurrentTokenSource();
/**
* Answers the starting position of the current token inside the original source.
* This position is zero-based and inclusive. It corresponds to the position of the first character
* which is part of this token. If this character was a unicode escape sequence, it points at the first
* character of this sequence.
*
* @return the starting position of the current token inside the original source
*/
int getCurrentTokenStartPosition();
/**
* Answers the ending position of the current token inside the original source.
* This position is zero-based and inclusive. It corresponds to the position of the last character
* which is part of this token. If this character was a unicode escape sequence, it points at the last
* character of this sequence.
*
* @return the ending position of the current token inside the original source
*/
int getCurrentTokenEndPosition();
/**
* Answers the starting position of a given line number. This line has to have been encountered
* already in the tokenization process (i.e. it cannot be used to compute positions of lines beyond
* current token). Once the entire source has been processed, it can be used without any limit.
* Line starting positions are zero-based, and start immediately after the previous line separator (if any).
*
* @param lineNumber the given line number
* @return the starting position of a given line number
*/
int getLineStart(int lineNumber);
/**
* Answers the ending position of a given line number. This line has to have been encountered
* already in the tokenization process (i.e. it cannot be used to compute positions of lines beyond
* current token). Once the entire source has been processed, it can be used without any limit.
* Line ending positions are zero-based, and correspond to the last character of the line separator
* (in case multi-character line separators).
*
* @param lineNumber the given line number
* @return the ending position of a given line number
**/
int getLineEnd(int lineNumber);
/**
* Answers an array of the ending positions of the lines encountered so far. Line ending positions
* are zero-based, and correspond to the last character of the line separator (in case multi-character
* line separators).
*
* @return an array of the ending positions of the lines encountered so far
*/
int[] getLineEnds();
/**
* Answers a 1-based line number using the lines which have been encountered so far. If the position
* is located beyond the current scanned line, then the last line number will be answered.
*
* @param charPosition the given character position
* @return a 1-based line number using the lines which have been encountered so far
*/
int getLineNumber(int charPosition);
/**
* Read the next token in the source, and answers its ID as specified by ITerminalSymbols
.
* Note that the actual token ID values are subject to change if new keywords were added to the language
* (i.e. 'assert' keyword in 1.4).
*
* @throws InvalidInputException - in case a lexical error was detected while reading the current token
*/
int getNextToken() throws InvalidInputException;
/**
* Answers the original source being processed (not a copy of it).
*
* @return the original source being processed
*/
char[] getSource();
/**
* Reposition the scanner on some portion of the original source. Once reaching the given endPosition
* it will answer EOF tokens (ITerminalSymbols.TokenNameEOF
).
*
* @param startPosition the given start position
* @param endPosition the given end position
*/
void resetTo(int startPosition, int endPosition);
/**
* Set the scanner source to process. By default, the scanner will consider starting at the beginning of the
* source until it reaches its end.
*
* @param source the given source
*/
void setSource(char[] source);
}