package net.sourceforge.phpeclipse.phpeditor; import java.util.HashMap; import net.sourceforge.phpeclipse.phpeditor.php.PHPKeywords; /********************************************************************** Copyright (c) 2000, 2002 IBM Corp. and others. All rights reserved. This program and the accompanying materials are made available under the terms of the Common Public License v1.0 which accompanies this distribution, and is available at http://www.eclipse.org/legal/cpl-v10.html Contributors: IBM Corporation - Initial implementation Klaus Hartlage - www.eclipseproject.de **********************************************************************/ public class PHPParser extends PHPKeywords { private static HashMap keywordMap = null; private String str; // current character char ch; // current token int token; // row counter for syntax errors: int rowCount; // column counter for syntax errors: int columnCount; int chIndx; // current identifier String identifier; Long longNumber; Double doubleNumber; final static int TT_EOF = 0; final static int TT_UNDEFINED = 1; final static int TT_NOT = 31; final static int TT_DOT = 32; final static int TT_POW = 33; final static int TT_DIVIDE = 34; final static int TT_MULTIPLY = 35; final static int TT_SUBTRACT = 36; final static int TT_ADD = 37; final static int TT_EQUAL = 38; final static int TT_UNEQUAL = 39; final static int TT_GREATER = 40; final static int TT_GREATEREQUAL = 41; final static int TT_LESS = 42; final static int TT_LESSEQUAL = 43; final static int TT_AND = 44; final static int TT_OR = 45; final static int TT_HASH = 46; final static int TT_DDOT = 47; final static int TT_DOTASSIGN = 48; final static int TT_SET = 49; final static int TT_REF = 50; final static int TT_FOREACH = 51; final static int TT_AMPERSAND = 52; final static int TT_DOLLARLISTOPEN = 53; final static int TT_ARGOPEN = 128; final static int TT_ARGCLOSE = 129; final static int TT_LISTOPEN = 130; final static int TT_LISTCLOSE = 131; final static int TT_PARTOPEN = 132; final static int TT_PARTCLOSE = 133; final static int TT_COMMA = 134; final static int TT_PERCENT = 135; final static int TT_STRING = 136; final static int TT_IDENTIFIER = 138; final static int TT_DIGIT = 139; final static int TT_SEMICOLON = 140; final static int TT_SLOT = 141; final static int TT_SLOTSEQUENCE = 142; final static int TT_DECREMENT = 144; final static int TT_INCREMENT = 145; final static int TT_ADDTO = 146; final static int TT_DIVIDEBY = 147; final static int TT_SUBTRACTFROM = 148; final static int TT_TIMESBY = 149; final static int TT_VARIABLE = 150; final static int TT_INT_NUMBER = 151; final static int TT_DOUBLE_NUMBER = 152; final static int TT_INTERPOLATED_STRING = 153; final static int TT_STRING_CONSTANT = 154; // final static int TT_AT = 153; // @ /** * Class Constructor. * *@param s *@param sess Description of Parameter *@see */ public PHPParser() { if (keywordMap == null) { keywordMap = new HashMap(); for (int i = 0; i < PHP_KEYWORS.length; i++) { keywordMap.put(PHP_KEYWORS[i], new Integer(PHP_KEYWORD_TOKEN[i])); } } this.str = ""; this.token = TT_EOF; this.chIndx = 0; this.rowCount = 1; this.columnCount = 0; getNextToken(); } private void throwSyntaxError(String error) { if (str.length() < chIndx) { chIndx--; } // read until end-of-line int eol = chIndx; while (str.length() > eol) { ch = str.charAt(eol++); if (ch == '\n') { eol--; break; } } throw new SyntaxError(rowCount, chIndx - columnCount + 1, str.substring(columnCount, eol), error); } /** * Method Declaration. * *@see */ void getChar() { if (str.length() > chIndx) { ch = str.charAt(chIndx++); return; } chIndx = str.length() + 1; ch = ' '; token = TT_EOF; } /** * gets the next token from input */ void getNextToken() { while (str.length() > chIndx) { ch = str.charAt(chIndx++); token = TT_UNDEFINED; if (ch == '\n') { rowCount++; columnCount = chIndx; continue; // while loop } if (!Character.isWhitespace(ch)) { if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || (ch == '_') || (ch == '$') || (ch == '@')) { getIdentifier(); return; } if (ch >= '0' && ch <= '9') { getNumber(); return; } if (ch == '/') { if (str.length() > chIndx) { if (str.charAt(chIndx) == '/') { chIndx++; // read comment until end of line: while ((str.length() > chIndx) && (str.charAt(chIndx) != '\n')) { chIndx++; } continue; } else if (str.charAt(chIndx) == '*') { chIndx++; // multi line comment: while (str.length() > chIndx) { if (str.charAt(chIndx) == '*' && (str.length() > (chIndx + 1)) && str.charAt(chIndx + 1) == '/') { chIndx += 2; break; } chIndx++; } continue; } } } else if (ch == '#') { // read comment until end of line: while ((str.length() > chIndx) && (str.charAt(chIndx) != '\n')) { chIndx++; } continue; } else if (ch == '"') { // read string until end while ((str.length() > chIndx) && (str.charAt(chIndx++) != '"')) { if (str.charAt(chIndx) == '\\') { if (str.length() > chIndx) { chIndx++; } if (str.length() > chIndx) { chIndx++; } } else { if (str.charAt(chIndx) == '\n') { rowCount++; columnCount = chIndx; } } } // if (str.length() > chIndx) { // chIndx++; // } token = TT_INTERPOLATED_STRING; return; } else if (ch == '\'') { // read string until end while ((str.length() > chIndx) && (str.charAt(chIndx++) != '\'')) { if (str.charAt(chIndx) == '\\') { if (str.length() > chIndx) { chIndx++; } if (str.length() > chIndx) { chIndx++; } } } // if (str.length() > chIndx) { // chIndx++; // } token = TT_STRING_CONSTANT; return; } switch (ch) { case '(' : token = TT_ARGOPEN; break; case ')' : token = TT_ARGCLOSE; break; case '{' : token = TT_LISTOPEN; break; case '}' : token = TT_LISTCLOSE; break; case '[' : token = TT_PARTOPEN; break; case ']' : token = TT_PARTCLOSE; break; case ',' : token = TT_COMMA; break; case '.' : token = TT_DOT; if (str.length() > chIndx) { if (str.charAt(chIndx) == '=') { chIndx++; token = TT_DOTASSIGN; break; } } break; case '"' : token = TT_STRING; break; case '%' : token = TT_PERCENT; break; case ';' : token = TT_SEMICOLON; break; case '^' : token = TT_POW; break; case '/' : token = TT_DIVIDE; if (str.length() > chIndx) { if (str.charAt(chIndx) == '=') { chIndx++; token = TT_DIVIDEBY; break; } } break; case '*' : token = TT_MULTIPLY; if (str.length() > chIndx) { if (str.charAt(chIndx) == '*') { chIndx++; token = TT_POW; break; } if (str.charAt(chIndx) == '=') { chIndx++; token = TT_TIMESBY; break; } } break; case '+' : token = TT_ADD; if (str.length() > chIndx) { if (str.charAt(chIndx) == '+') { chIndx++; token = TT_INCREMENT; break; } if (str.charAt(chIndx) == '=') { chIndx++; token = TT_ADDTO; break; } } break; case '-' : token = TT_SUBTRACT; if (str.length() > chIndx) { if (str.charAt(chIndx) == '-') { chIndx++; token = TT_DECREMENT; break; } if (str.charAt(chIndx) == '=') { chIndx++; token = TT_SUBTRACTFROM; break; } if (str.charAt(chIndx) == '>') { chIndx++; token = TT_REF; break; } } break; case '=' : token = TT_SET; if (str.length() > chIndx) { ch = str.charAt(chIndx); if (ch == '=') { chIndx++; token = TT_EQUAL; break; } if (ch == '>') { chIndx++; token = TT_FOREACH; break; } } break; case '!' : token = TT_NOT; if (str.length() > chIndx) { if (str.charAt(chIndx) == '=') { chIndx++; token = TT_UNEQUAL; break; } } break; case '>' : token = TT_GREATER; if (str.length() > chIndx) { if (str.charAt(chIndx) == '=') { chIndx++; token = TT_GREATEREQUAL; break; } } break; case '<' : token = TT_LESS; if (str.length() > chIndx) { if (str.charAt(chIndx) == '=') { chIndx++; token = TT_LESSEQUAL; break; } } break; case '|' : if (str.length() > chIndx) { if (str.charAt(chIndx) == '|') { chIndx++; token = TT_OR; break; } } break; case '&' : if (str.length() > chIndx) { if (str.charAt(chIndx) == '&') { chIndx++; token = TT_AND; break; } else { token = TT_AMPERSAND; break; } } break; case ':' : token = TT_DDOT; break; case '#' : token = TT_HASH; break; // case '@' : // token = TT_AT; // // break; default : throwSyntaxError("unexpected character: '" + ch + "'"); } if (token == TT_UNDEFINED) { throwSyntaxError("token not found"); } return; } } chIndx = str.length() + 1; ch = ' '; token = TT_EOF; } void getIdentifier() { StringBuffer ident = new StringBuffer(); ident.append(ch); if (ch == '$') { token = TT_VARIABLE; } else { token = TT_IDENTIFIER; } getChar(); while ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || (ch >= '0' && ch <= '9') || (ch >= '_')) { ident.append(ch); getChar(); } identifier = ident.toString(); chIndx--; Integer i = (Integer) keywordMap.get(identifier.toLowerCase()); if (i != null) { token = i.intValue(); } } void getNumber() { StringBuffer inum = new StringBuffer(); char dFlag = ' '; int numFormat = 10; // save first digit char firstCh = ch; inum.append(ch); getChar(); // determine number conversions: if (firstCh == '0') { switch (ch) { case 'b' : numFormat = 2; getChar(); break; case 'B' : numFormat = 2; getChar(); break; case 'o' : numFormat = 8; getChar(); break; case 'O' : numFormat = 8; getChar(); break; case 'x' : numFormat = 16; getChar(); break; case 'X' : numFormat = 16; getChar(); break; } } if (numFormat == 16) { while ((ch >= '0' && ch <= '9') || (ch >= 'a' && ch <= 'f') || (ch >= 'A' && ch <= 'F')) { inum.append(ch); getChar(); } } else { while ((ch >= '0' && ch <= '9') || (ch == '.') || (ch == 'E') || (ch == 'e')) { if ((ch == '.') || (ch == 'E') || (ch == 'e')) { if (ch == '.' && dFlag != ' ') { break; } if ((dFlag == 'E') || (dFlag == 'e')) { break; } dFlag = ch; inum.append(ch); getChar(); if ((ch == '-') || (ch == '+')) { inum.append(ch); getChar(); } } else { inum.append(ch); getChar(); } } } chIndx--; try { if (dFlag != ' ') { doubleNumber = new Double(inum.toString()); token = TT_DOUBLE_NUMBER; return; } else { longNumber = Long.valueOf(inum.toString(), numFormat); token = TT_INT_NUMBER; return; } } catch (Throwable e) { throwSyntaxError("Number format error: " + inum.toString()); } } public void start(String s, int rowCount) throws SyntaxError { // start up this.str = s; this.token = TT_EOF; this.chIndx = 0; this.rowCount = rowCount; this.columnCount = 0; getNextToken(); statementList(); if (token != TT_EOF) { if (token == TT_ARGCLOSE) { throwSyntaxError("too many closing ')'; end-of-file not reached"); } if (token == TT_LISTCLOSE) { throwSyntaxError("too many closing '}'; end-of-file not reached"); } if (token == TT_PARTCLOSE) { throwSyntaxError("too many closing ']'; end-of-file not reached"); } if (token == TT_ARGOPEN) { throwSyntaxError("read character '('; end-of-file not reached"); } if (token == TT_LISTOPEN) { throwSyntaxError("read character '{'; end-of-file not reached"); } if (token == TT_PARTOPEN) { throwSyntaxError("read character '['; end-of-file not reached"); } throwSyntaxError("end-of-file not reached"); } } public void statementList() { do { statement(); if ((token == TT_LISTCLOSE) || (token == TT_elseif) || (token == TT_endif) || (token == TT_endfor) || (token == TT_endforeach) || (token == TT_endwhile) || (token == TT_endswitch) || (token == TT_EOF)) { return; } } while (true); } public void statement() { while (token != TT_UNDEFINED && token != TT_EOF) { if (token > TT_KEYWORD) { if (token == TT_case) { getNextToken(); constant(); if (token == TT_DDOT) { getNextToken(); statement(); } else { throwSyntaxError("':' character after 'case' constant expected."); } return; } else if (token == TT_default) { getNextToken(); if (token == TT_DDOT) { getNextToken(); statement(); } else { throwSyntaxError("':' character after 'default' expected."); } return; } else if (token == TT_include || token == TT_include_once) { getNextToken(); expression(); if (token == TT_SEMICOLON) { getNextToken(); } else { throwSyntaxError("';' character after 'include' or 'include_once' expected."); } return; } else if (token == TT_require || token == TT_require_once) { getNextToken(); //constant(); expression(); if (token == TT_SEMICOLON) { getNextToken(); } else { throwSyntaxError("';' character after 'require' or 'require_once' expected."); } return; } else if (token == TT_if) { getNextToken(); if (token == TT_ARGOPEN) { getNextToken(); } else { throwSyntaxError("'(' expected after 'if' keyword."); } expression(); if (token == TT_ARGCLOSE) { getNextToken(); } else { throwSyntaxError("')' expected after 'if' condition."); } ifStatement(); return; } else if (token == TT_switch) { getNextToken(); if (token == TT_ARGOPEN) { getNextToken(); } else { throwSyntaxError("'(' expected after 'switch' keyword."); } expression(); if (token == TT_ARGCLOSE) { getNextToken(); } else { throwSyntaxError("')' expected after 'switch' condition."); } switchStatement(); return; } else if (token == TT_for) { getNextToken(); if (token == TT_ARGOPEN) { getNextToken(); } else { throwSyntaxError("'(' expected after 'for' keyword."); } if (token == TT_SEMICOLON) { getNextToken(); } else { expression(); if (token == TT_SEMICOLON) { getNextToken(); } else { throwSyntaxError("';' character after 'for' expected."); } } if (token == TT_SEMICOLON) { getNextToken(); } else { expression(); if (token == TT_SEMICOLON) { getNextToken(); } else { throwSyntaxError("';' character after 'for' expected."); } } if (token == TT_ARGCLOSE) { getNextToken(); } else { expression(); if (token == TT_ARGCLOSE) { getNextToken(); } else { throwSyntaxError("')' expected after 'for' condition."); } } forStatement(); return; } else if (token == TT_while) { getNextToken(); if (token == TT_ARGOPEN) { getNextToken(); } else { throwSyntaxError("'(' expected after 'while' keyword."); } expression(); if (token == TT_ARGCLOSE) { getNextToken(); } else { throwSyntaxError("')' expected after 'while' condition."); } whileStatement(); return; } else if (token == TT_foreach) { getNextToken(); if (token == TT_ARGOPEN) { getNextToken(); } else { throwSyntaxError("'(' expected after 'foreach' keyword."); } expression(); if (token == TT_as) { getNextToken(); } else { throwSyntaxError("'as' expected after 'foreach' exxpression."); } variable(); if (token == TT_FOREACH) { getNextToken(); variable(); } if (token == TT_ARGCLOSE) { getNextToken(); } else { throwSyntaxError("')' expected after 'foreach' expression."); } foreachStatement(); return; } else if (token == TT_continue || token == TT_break || token == TT_return) { getNextToken(); if (token != TT_SEMICOLON) { expression(); } if (token == TT_SEMICOLON) { getNextToken(); } else { throwSyntaxError("';' expected after 'continue', 'break' or 'return'."); } return; } else if (token == TT_echo) { getNextToken(); expressionList(); if (token == TT_SEMICOLON) { getNextToken(); } else { throwSyntaxError("';' expected after 'echo' statement."); } return; } else if (token == TT_print) { getNextToken(); expression(); if (token == TT_SEMICOLON) { getNextToken(); } else { throwSyntaxError("';' expected after 'print' statement."); } return; } else if (token == TT_global || token == TT_static) { getNextToken(); variableList(); if (token == TT_SEMICOLON) { getNextToken(); } else { throwSyntaxError("';' expected after 'global' or 'static' statement."); } return; } else if (token == TT_unset) { getNextToken(); if (token == TT_ARGOPEN) { getNextToken(); } else { throwSyntaxError("'(' expected after 'unset' keyword."); } variableList(); if (token == TT_ARGCLOSE) { getNextToken(); } else { throwSyntaxError("')' expected after 'unset' statement."); } if (token == TT_SEMICOLON) { getNextToken(); } else { throwSyntaxError("';' expected after 'unset' statement."); } return; } else if (token == TT_exit || token == TT_die) { getNextToken(); if (token != TT_SEMICOLON) { exitStatus(); } if (token == TT_SEMICOLON) { getNextToken(); } else { throwSyntaxError("';' expected after 'exit' or 'die' statement."); } return; } else if (token == TT_define) { getNextToken(); if (token == TT_ARGOPEN) { getNextToken(); } else { throwSyntaxError("'(' expected after 'define' keyword."); } constant(); if (token == TT_COMMA) { getNextToken(); } else { throwSyntaxError("',' expected after first 'define' constant."); } constant(); if (token == TT_ARGCLOSE) { getNextToken(); } else { throwSyntaxError("')' expected after 'define' statement."); } if (token == TT_SEMICOLON) { getNextToken(); } else { throwSyntaxError("';' expected after 'define' statement."); } return; } } else if (token == TT_LISTOPEN) { // compundStatement getNextToken(); if (token != TT_LISTCLOSE) { statementList(); } if (token == TT_LISTCLOSE) { getNextToken(); } else { throwSyntaxError("'}' expected."); } } else { if (token != TT_SEMICOLON) { expression(); } if (token == TT_SEMICOLON) { getNextToken(); } else { throwSyntaxError("';' expected after expression."); } } } } public void labeledStatement() { } public void expressionStatement() { } public void inclusionStatement() { } // public void compoundStatement() { // } public void selectionStatement() { } public void iterationStatement() { } public void jumpStatement() { } public void outputStatement() { } public void scopeStatement() { } public void flowStatement() { } public void definitionStatement() { } public void ifStatement() { // statement [else-statement] statement(); if (token == TT_else) { getNextToken(); statement(); } } public void switchStatement() { } public void forStatement() { } public void whileStatement() { } public void foreachStatement() { } public void exitStatus() { if (token == TT_ARGOPEN) { getNextToken(); } else { throwSyntaxError("'(' expected in 'exit-status'."); } if (token != TT_ARGCLOSE) { expression(); } if (token == TT_ARGCLOSE) { getNextToken(); } else { throwSyntaxError("')' expected after 'exit-status'."); } } public void expressionList() { do { expression(); if (token == TT_COMMA) { getNextToken(); } else { break; } } while (true); } public void expression() { if (token == TT_STRING_CONSTANT || token == TT_INTERPOLATED_STRING) { getNextToken(); } else { postfixExpression(); // while (token != TT_SEMICOLON) { // getNextToken(); // } } } public void postfixExpression() { switch (token) { case TT_ARGOPEN : getNextToken(); expression(); if (token != TT_ARGCLOSE) { throwSyntaxError(") expected in postfix-expression."); } getNextToken(); break; case TT_DOUBLE_NUMBER : getNextToken(); break; case TT_INT_NUMBER : getNextToken(); break; case TT_VARIABLE : getNextToken(); break; case TT_IDENTIFIER : getNextToken(); if (token == TT_ARGOPEN) { getNextToken(); if (token != TT_ARGCLOSE) { expressionList(); if (token != TT_ARGCLOSE) { throwSyntaxError(") expected after identifier in postfix-expression."); } } getNextToken(); } break; } boolean while_flag = true; do { switch (token) { case TT_PARTOPEN : getNextToken(); expression(); if (token != TT_PARTCLOSE) { throwSyntaxError("] expected in postfix-expression."); } getNextToken(); break; case TT_REF : switch (token) { case TT_VARIABLE : getNextToken(); break; case TT_IDENTIFIER : getNextToken(); break; case TT_LISTOPEN : getNextToken(); expression(); if (token != TT_LISTCLOSE) { throwSyntaxError("] expected in postfix-expression."); } getNextToken(); break; default : throwSyntaxError("Syntax error after '->' token."); } case TT_INCREMENT : getNextToken(); break; case TT_DECREMENT : getNextToken(); break; default : while_flag = false; } } while (while_flag); } public void variableList() { do { variable(); if (token == TT_COMMA) { getNextToken(); } else { break; } } while (true); } public void variable() { if (token == TT_VARIABLE) { getNextToken(); } else { throwSyntaxError("$-variable expected in variable-list."); } } public void constant() { } }