net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/core/compiler/IScanner.java

   1 /**********************************************************************
   2  Copyright (c) 2002 IBM Corp. and others.
   3  All rights reserved. � This program and the accompanying materials
   4  are made available under the terms of the Common Public License v0.5
   5  which accompanies this distribution, and is available at
   6  http://www.eclipse.org/legal/cpl-v05.html
   7  �
   8  Contributors:
   9  IBM Corporation - initial API and implementation
  10  **********************************************************************/
  11
  12 package net.sourceforge.phpdt.core.compiler;
  13
  14 import net.sourceforge.phpdt.core.compiler.ITerminalSymbols.TokenName;
  15
  16 /**
  17  * Definition of a Java scanner, as returned by the <code>ToolFactory</code>.
  18  * The scanner is responsible for tokenizing a given source, providing
  19  * information about the nature of the token read, its positions and source
  20  * equivalent.
  21  *
  22  * When the scanner has finished tokenizing, it answers an EOF token (<code>
  23  * ITerminalSymbols#TokenNameEOF</code>.
  24  *
  25  * When encountering lexical errors, an <code>InvalidInputException</code> is
  26  * thrown.
  27  *
  28  * @see net.sourceforge.phpdt.core.ToolFactory
  29  * @see ITerminalSymbols
  30  * @since 2.0
  31  */
  32 public interface IScanner {
  33
  34         /**
  35          * Answers the current identifier source, after unicode escape sequences
  36          * have been translated into unicode characters. e.g. if original source was
  37          * <code>\\u0061bc</code> then it will answer <code>abc</code>.
  38          *
  39          * @return the current identifier source, after unicode escape sequences
  40          *         have been translated into unicode characters
  41          */
  42         char[] getCurrentTokenSource();
  43
  44         /**
  45          * Answers the starting position of the current token inside the original
  46          * source. This position is zero-based and inclusive. It corresponds to the
  47          * position of the first character which is part of this token. If this
  48          * character was a unicode escape sequence, it points at the first character
  49          * of this sequence.
  50          *
  51          * @return the starting position of the current token inside the original
  52          *         source
  53          */
  54         int getCurrentTokenStartPosition();
  55
  56         /**
  57          * Answers the ending position of the current token inside the original
  58          * source. This position is zero-based and inclusive. It corresponds to the
  59          * position of the last character which is part of this token. If this
  60          * character was a unicode escape sequence, it points at the last character
  61          * of this sequence.
  62          *
  63          * @return the ending position of the current token inside the original
  64          *         source
  65          */
  66         int getCurrentTokenEndPosition();
  67
  68         /**
  69          * Answers the starting position of a given line number. This line has to
  70          * have been encountered already in the tokenization process (i.e. it cannot
  71          * be used to compute positions of lines beyond current token). Once the
  72          * entire source has been processed, it can be used without any limit. Line
  73          * starting positions are zero-based, and start immediately after the
  74          * previous line separator (if any).
  75          *
  76          * @param lineNumber
  77          *            the given line number
  78          * @return the starting position of a given line number
  79          */
  80         int getLineStart(int lineNumber);
  81
  82         /**
  83          * Answers the ending position of a given line number. This line has to have
  84          * been encountered already in the tokenization process (i.e. it cannot be
  85          * used to compute positions of lines beyond current token). Once the entire
  86          * source has been processed, it can be used without any limit. Line ending
  87          * positions are zero-based, and correspond to the last character of the
  88          * line separator (in case multi-character line separators).
  89          *
  90          * @param lineNumber
  91          *            the given line number
  92          * @return the ending position of a given line number
  93          */
  94         int getLineEnd(int lineNumber);
  95
  96         /**
  97          * Answers an array of the ending positions of the lines encountered so far.
  98          * Line ending positions are zero-based, and correspond to the last
  99          * character of the line separator (in case multi-character line
 100          * separators).
 101          *
 102          * @return an array of the ending positions of the lines encountered so far
 103          */
 104         int[] getLineEnds();
 105
 106         /**
 107          * Answers a 1-based line number using the lines which have been encountered
 108          * so far. If the position is located beyond the current scanned line, then
 109          * the last line number will be answered.
 110          *
 111          * @param charPosition
 112          *            the given character position
 113          * @return a 1-based line number using the lines which have been encountered
 114          *         so far
 115          */
 116         int getLineNumber(int charPosition);
 117
 118         /**
 119          * Read the next token in the source, and answers its ID as specified by
 120          * <code>ITerminalSymbols</code>. Note that the actual token ID values
 121          * are subject to change if new keywords were added to the language (i.e.
 122          * 'assert' keyword in 1.4).
 123          *
 124          * @throws InvalidInputException -
 125          *             in case a lexical error was detected while reading the
 126          *             current token
 127          */
 128         TokenName getNextToken() throws InvalidInputException;
 129
 130         /**
 131          * Answers the original source being processed (not a copy of it).
 132          *
 133          * @return the original source being processed
 134          */
 135         char[] getSource();
 136
 137         /**
 138          * Reposition the scanner on some portion of the original source. Once
 139          * reaching the given <code>endPosition</code> it will answer EOF tokens (<code>ITerminalSymbols.TokenNameEOF</code>).
 140          *
 141          * @param startPosition
 142          *            the given start position
 143          * @param endPosition
 144          *            the given end position
 145          */
 146         void resetTo(int startPosition, int endPosition);
 147
 148         /**
 149          * Set the scanner source to process. By default, the scanner will consider
 150          * starting at the beginning of the source until it reaches its end.
 151          *
 152          * @param source
 153          *            the given source
 154          */
 155         void setSource(char[] source);
 156 }