net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/core/compiler/IScanner.java

   1 /**********************************************************************
   2 Copyright (c) 2002 IBM Corp. and others.
   3 All rights reserved.   This program and the accompanying materials
   4 are made available under the terms of the Common Public License v0.5
   5 which accompanies this distribution, and is available at
   6 http://www.eclipse.org/legal/cpl-v05.html
   7
   8 Contributors:
   9      IBM Corporation - initial API and implementation
  10 **********************************************************************/
  11
  12 package net.sourceforge.phpdt.core.compiler;
  13
  14
  15  /**
  16   * Definition of a Java scanner, as returned by the <code>ToolFactory</code>.
  17   * The scanner is responsible for tokenizing a given source, providing information about
  18   * the nature of the token read, its positions and source equivalent.
  19   *
  20   * When the scanner has finished tokenizing, it answers an EOF token (<code>
  21   * ITerminalSymbols#TokenNameEOF</code>.
  22   *
  23   * When encountering lexical errors, an <code>InvalidInputException</code> is thrown.
  24   *
  25   * @see org.eclipse.jdt.core.ToolFactory
  26   * @see ITerminalSymbols
  27   * @since 2.0
  28   */
  29 public interface IScanner {
  30
  31         /**
  32          * Answers the current identifier source, after unicode escape sequences have
  33          * been translated into unicode characters.
  34          * e.g. if original source was <code>\\u0061bc</code> then it will answer <code>abc</code>.
  35          *
  36          * @return the current identifier source, after unicode escape sequences have
  37          * been translated into unicode characters
  38          */
  39         char[] getCurrentTokenSource();
  40
  41         /**
  42          * Answers the starting position of the current token inside the original source.
  43          * This position is zero-based and inclusive. It corresponds to the position of the first character
  44          * which is part of this token. If this character was a unicode escape sequence, it points at the first
  45          * character of this sequence.
  46          *
  47          * @return the starting position of the current token inside the original source
  48          */
  49         int getCurrentTokenStartPosition();
  50
  51         /**
  52          * Answers the ending position of the current token inside the original source.
  53          * This position is zero-based and inclusive. It corresponds to the position of the last character
  54          * which is part of this token. If this character was a unicode escape sequence, it points at the last
  55          * character of this sequence.
  56          *
  57          * @return the ending position of the current token inside the original source
  58          */
  59         int getCurrentTokenEndPosition();
  60
  61         /**
  62          * Answers the starting position of a given line number. This line has to have been encountered
  63          * already in the tokenization process (i.e. it cannot be used to compute positions of lines beyond
  64          * current token). Once the entire source has been processed, it can be used without any limit.
  65          * Line starting positions are zero-based, and start immediately after the previous line separator (if any).
  66          *
  67          * @param lineNumber the given line number
  68          * @return the starting position of a given line number
  69          */
  70         int getLineStart(int lineNumber);
  71
  72         /**
  73          * Answers the ending position of a given line number. This line has to have been encountered
  74          * already in the tokenization process (i.e. it cannot be used to compute positions of lines beyond
  75          * current token). Once the entire source has been processed, it can be used without any limit.
  76          * Line ending positions are zero-based, and correspond to the last character of the line separator
  77          * (in case multi-character line separators).
  78          *
  79          * @param lineNumber the given line number
  80          * @return the ending position of a given line number
  81          **/
  82         int getLineEnd(int lineNumber);
  83
  84         /**
  85          * Answers an array of the ending positions of the lines encountered so far. Line ending positions
  86          * are zero-based, and correspond to the last character of the line separator (in case multi-character
  87          * line separators).
  88          *
  89          * @return an array of the ending positions of the lines encountered so far
  90          */
  91         int[] getLineEnds();
  92
  93         /**
  94          * Answers a 1-based line number using the lines which have been encountered so far. If the position
  95          * is located beyond the current scanned line, then the last line number will be answered.
  96          *
  97          * @param charPosition the given character position
  98          * @return a 1-based line number using the lines which have been encountered so far
  99          */
 100         int getLineNumber(int charPosition);
 101
 102         /**
 103          * Read the next token in the source, and answers its ID as specified by <code>ITerminalSymbols</code>.
 104          * Note that the actual token ID values are subject to change if new keywords were added to the language
 105          * (i.e. 'assert' keyword in 1.4).
 106          *
 107          * @throws InvalidInputException - in case a lexical error was detected while reading the current token
 108          */
 109         int getNextToken() throws InvalidInputException;
 110
 111         /**
 112          * Answers the original source being processed (not a copy of it).
 113          *
 114          * @return the original source being processed
 115          */
 116         char[] getSource();
 117
 118         /**
 119          * Reposition the scanner on some portion of the original source. Once reaching the given <code>endPosition</code>
 120          * it will answer EOF tokens (<code>ITerminalSymbols.TokenNameEOF</code>).
 121          *
 122          * @param startPosition the given start position
 123          * @param endPosition the given end position
 124          */
 125         void resetTo(int startPosition, int endPosition);
 126
 127         /**
 128          * Set the scanner source to process. By default, the scanner will consider starting at the beginning of the
 129          * source until it reaches its end.
 130          *
 131          * @param source the given source
 132          */
 133         void setSource(char[] source);
 134 }