X-Git-Url: http://secure.phpeclipse.com
diff --git a/archive/org.plog4u.wiki/src/org/plog4u/wiki/filter/WikipediaParser.java b/archive/org.plog4u.wiki/src/org/plog4u/wiki/filter/WikipediaParser.java
new file mode 100644
index 0000000..5383fb7
--- /dev/null
+++ b/archive/org.plog4u.wiki/src/org/plog4u/wiki/filter/WikipediaParser.java
@@ -0,0 +1,2739 @@
+package org.plog4u.wiki.filter;
+
+import java.io.IOException;
+import java.io.Writer;
+import java.util.ArrayList;
+import java.util.NoSuchElementException;
+import java.util.Stack;
+import java.util.StringTokenizer;
+
+//import org.apache.commons.logging.Log;
+//import org.apache.commons.logging.LogFactory;
+import org.plog4u.wiki.filter.WikipediaFilter.InvalidInputException;
+import org.plog4u.wiki.filter.tags.AbstractTag;
+import org.plog4u.wiki.filter.tags.CloseTagToken;
+import org.plog4u.wiki.filter.tags.ListToken;
+import org.plog4u.wiki.filter.tags.OpenTagToken;
+import org.plog4u.wiki.filter.tags.SpecialTagToken;
+import org.radeox.api.engine.ImageRenderEngine;
+import org.radeox.api.engine.IncludeRenderEngine;
+import org.radeox.api.engine.RenderEngine;
+import org.radeox.api.engine.WikiRenderEngine;
+import org.radeox.filter.context.FilterContext;
+import org.radeox.filter.interwiki.InterWiki;
+import org.radeox.macro.Macro;
+import org.radeox.macro.MacroRepository;
+import org.radeox.macro.parameter.MacroParameter;
+import org.radeox.util.Encoder;
+import org.radeox.util.StringBufferWriter;
+
+/**
+ * A parser for the WikipediaFilter
+ *
+ * @see org.plog4u.wiki.filter.WikipediaFilter
+ */
+public class WikipediaParser {
+ // private static Log log = LogFactory.getLog(WikipediaFilter.class);
+
+ MacroRepository fMacros;
+
+ private FilterContext fContext;
+
+ private RenderEngine fWikiEngine;
+
+ // TODO check, if this counter is correct in recursions:
+ private int fImageCounter;
+
+ /**
+ * The current snip
+ */
+ // private Snip fSnip;
+ /**
+ * If the snip contains headings for a "table of content" this buffer temporarily contains the start of the snip and the
+ * "table of content"
+ */
+ private StringBuffer fResultBufferHeader = null;
+
+ /**
+ * The buffer for the resulting HTML rendering from the current snip.
+ */
+ private StringBuffer fResultBuffer;
+
+ /**
+ * The wiki syntax string which should be parsed
+ */
+ private char[] fSource;
+
+ /**
+ * The corresponding String for the character source array
+ */
+ private final String fStringSource;
+
+ /**
+ * The current scanned character
+ */
+ private char fCurrentCharacter;
+
+ /**
+ * The current offset in the character source array
+ */
+ private int fCurrentPosition;
+
+ /**
+ * The current recursion level for this parser
+ */
+ private int fRecursionLevel;
+
+ private Stack fTokenStack;
+
+ // private Stack fTableStack;
+
+ private boolean fWhiteStart = false;
+
+ private int fWhiteStartPosition = 0;
+
+ // private TeXParser fTeXParser;
+ // private TeXParser fTeXImageParser;
+ /**
+ *
+ * "table of content"
+ *
+ */
+ private ArrayList fTableOfContent = null;
+
+ // private String fSrcPath;
+ // private String fBinPath;
+
+ public WikipediaParser(MacroRepository macros, String stringSource, StringBuffer result, FilterContext context, int recursionLevel) {
+ fContext = context;
+ fWikiEngine = context.getRenderContext().getRenderEngine();
+
+ // try {
+ // SnipMacroParameter params = (SnipMacroParameter)
+ // fContext.getMacroParameter();
+ // fSnip = params.getSnipRenderContext().getSnip();
+ // } catch (ClassCastException e) {
+ // e.printStackTrace();
+ // }
+ fMacros = macros;
+ fResultBuffer = result;
+ fStringSource = stringSource;
+ setSource(stringSource.toCharArray());
+ fRecursionLevel = recursionLevel;
+ fTokenStack = new Stack();
+ // fTableStack = new Stack();
+ // fTeXParser = new TeXParser("", "m:");
+ // fTeXImageParser = new TeXParser("", "");
+ fImageCounter = 1;
+
+ // fSrcPath = (String) fContext.getRenderContext().get("srcpath");
+ // if (fSrcPath==null) {
+ // fSrcPath = "";
+ // }
+ // fBinPath = (String) fContext.getRenderContext().get("binpath");
+ // if (fBinPath==null) {
+ // fBinPath = "";
+ // }
+ }
+
+ /**
+ * Check until a new-line was found, if there are only whitespace characters before the given endposition.
+ *
+ * @param startPosition
+ * @param endPosition
+ * @return -1 if no whitespace line is found from the end (i.e. endPosition); otherwise the offset directly after where the
+ * new-line was found
+ */
+ private int checkWhitespaces(int startPosition, int endPosition) {
+ char tempChar;
+ while (endPosition >= startPosition) {
+ if ((tempChar = fSource[endPosition--]) == '\n') {
+ return endPosition + 2;
+ }
+ if (tempChar != ' ' && tempChar != '\t' && tempChar != '\r') {
+ return -1;
+ }
+ }
+ if (endPosition < startPosition && endPosition >= 0) {
+ if ((tempChar = fSource[endPosition]) != '\n') {
+ return -1;
+ }
+ } else if (endPosition == (-1) && startPosition == 0) {
+ // special case at the start of a string
+ return 0;
+ }
+ return startPosition;
+ }
+
+ /**
+ * copy the content in the resulting buffer and escape special html characters (< > " & ')
+ */
+ private void copyWhite(boolean whiteStart, final int whiteStartPosition, final int diff) {
+ if (whiteStart) {
+ final int len = fCurrentPosition - diff;
+ int currentIndex = whiteStartPosition;
+ int lastIndex = currentIndex;
+ while (currentIndex < len) {
+ switch (fSource[currentIndex++]) {
+ case '<': // special html escape character
+ if (lastIndex < (currentIndex - 1)) {
+ fResultBuffer.append(fSource, lastIndex, currentIndex - lastIndex - 1);
+ lastIndex = currentIndex;
+ } else {
+ lastIndex++;
+ }
+ fResultBuffer.append("<");
+ break;
+ case '>': // special html escape character
+ if (lastIndex < (currentIndex - 1)) {
+ fResultBuffer.append(fSource, lastIndex, currentIndex - lastIndex - 1);
+ lastIndex = currentIndex;
+ } else {
+ lastIndex++;
+ }
+ fResultBuffer.append(">");
+ break;
+ case '&': // special html escape character
+ if (lastIndex < (currentIndex - 1)) {
+ fResultBuffer.append(fSource, lastIndex, currentIndex - lastIndex - 1);
+ lastIndex = currentIndex;
+ } else {
+ lastIndex++;
+ }
+ fResultBuffer.append("&");
+ break;
+ case '\'': // special html escape character
+ if (lastIndex < (currentIndex - 1)) {
+ fResultBuffer.append(fSource, lastIndex, currentIndex - lastIndex - 1);
+ lastIndex = currentIndex;
+ } else {
+ lastIndex++;
+ }
+ fResultBuffer.append("'");
+ break;
+ case '\"': // special html escape character
+ if (lastIndex < (currentIndex - 1)) {
+ fResultBuffer.append(fSource, lastIndex, currentIndex - lastIndex - 1);
+ lastIndex = currentIndex;
+ } else {
+ lastIndex++;
+ }
+ fResultBuffer.append(""");
+ break;
+ }
+ }
+ if (lastIndex < (currentIndex)) {
+ fResultBuffer.append(fSource, lastIndex, currentIndex - lastIndex);
+ }
+ fWhiteStart = false;
+ }
+ }
+
+ /**
+ * copy the text in the resulting buffer and escape special html characters (< > " & ')
+ */
+ private void copyWhite(String text) {
+ final int len = text.length();
+ int currentIndex = 0;
+ int lastIndex = currentIndex;
+ while (currentIndex < len) {
+ switch (text.charAt(currentIndex++)) {
+ case '<': // special html escape character
+ if (lastIndex < (currentIndex - 1)) {
+ fResultBuffer.append(text.substring(lastIndex, currentIndex - 1));
+ lastIndex = currentIndex;
+ }
+ fResultBuffer.append("<");
+ break;
+ case '>': // special html escape character
+ if (lastIndex < (currentIndex - 1)) {
+ fResultBuffer.append(text.substring(lastIndex, currentIndex - 1));
+ lastIndex = currentIndex;
+ } else {
+ lastIndex++;
+ }
+ fResultBuffer.append(">");
+ break;
+ case '&': // special html escape character
+ if (lastIndex < (currentIndex - 1)) {
+ fResultBuffer.append(text.substring(lastIndex, currentIndex - 1));
+ lastIndex = currentIndex;
+ } else {
+ lastIndex++;
+ }
+ fResultBuffer.append("&");
+ break;
+ case '\'': // special html escape character
+ if (lastIndex < (currentIndex - 1)) {
+ fResultBuffer.append(text.substring(lastIndex, currentIndex - 1));
+ lastIndex = currentIndex;
+ } else {
+ lastIndex++;
+ }
+ fResultBuffer.append("'");
+ break;
+ case '\"': // special html escape character
+ if (lastIndex < (currentIndex - 1)) {
+ fResultBuffer.append(text.substring(lastIndex, currentIndex - 1));
+ lastIndex = currentIndex;
+ } else {
+ lastIndex++;
+ }
+ fResultBuffer.append(""");
+ break;
+ }
+ }
+ if (lastIndex < (currentIndex)) {
+ fResultBuffer.append(text.substring(lastIndex, currentIndex));
+ }
+ }
+
+ /**
+ * Copy the text in the resulting buffer and escape special html characters (< > " & ') Additionally every
+ * newline will be replaced by <br/>
+ */
+ private void copyNowikiNewLine(String text) {
+ final int len = text.length();
+ int currentIndex = 0;
+ int lastIndex = currentIndex;
+ while (currentIndex < len) {
+ switch (text.charAt(currentIndex++)) {
+ case '\n':
+ if (lastIndex < (currentIndex - 1)) {
+ fResultBuffer.append(text.substring(lastIndex, currentIndex - 1));
+ lastIndex = currentIndex;
+ } else {
+ lastIndex++;
+ }
+ fResultBuffer.append("
");
+ break;
+ case '<': // special html escape character
+ if (lastIndex < (currentIndex - 1)) {
+ fResultBuffer.append(text.substring(lastIndex, currentIndex - 1));
+ lastIndex = currentIndex;
+ } else {
+ lastIndex++;
+ }
+ fResultBuffer.append("<");
+ break;
+ case '>': // special html escape character
+ if (lastIndex < (currentIndex - 1)) {
+ fResultBuffer.append(text.substring(lastIndex, currentIndex - 1));
+ lastIndex = currentIndex;
+ } else {
+ lastIndex++;
+ }
+ fResultBuffer.append(">");
+ break;
+ // case '&': // special html escape character
+ // if (lastIndex < (currentIndex - 1)) {
+ // fResultBuffer.append(text.substring(lastIndex, currentIndex - 1));
+ // lastIndex = currentIndex;
+ // } else {
+ // lastIndex++;
+ // }
+ // fResultBuffer.append("&");
+ // break;
+ case '\'': // special html escape character
+ if (lastIndex < (currentIndex - 1)) {
+ fResultBuffer.append(text.substring(lastIndex, currentIndex - 1));
+ lastIndex = currentIndex;
+ } else {
+ lastIndex++;
+ }
+ fResultBuffer.append("'");
+ break;
+ case '\"': // special html escape character
+ if (lastIndex < (currentIndex - 1)) {
+ fResultBuffer.append(text.substring(lastIndex, currentIndex - 1));
+ lastIndex = currentIndex;
+ } else {
+ lastIndex++;
+ }
+ fResultBuffer.append(""");
+ break;
+ }
+ }
+ if (lastIndex < (currentIndex)) {
+ fResultBuffer.append(text.substring(lastIndex, currentIndex));
+ }
+ }
+
+ /**
+ * Render the HTML token which are defined in the OPEN_TAGS and CLOSE_TAGS map
+ *
+ * @return
+ */
+ public int getHTMLToken() {
+ int currentHtmlPosition = fCurrentPosition;
+ try {
+ char closeCharacter;
+ char nextCharacter;
+ if (getNextChar('/')) {
+ currentHtmlPosition++;
+ // closing tag
+ int r = readUntilCharOrEOL('>');
+ if (r != 1) {
+ return WikipediaFilter.TokenNotFound;
+ }
+ String closeTagString = new String(fSource, currentHtmlPosition, fCurrentPosition - currentHtmlPosition - 1).toLowerCase();
+ // System.out.println(closeTagString);
+ StringTokenizer tagTokenizer = new StringTokenizer(closeTagString);
+ String tokenString;
+ try {
+ tokenString = tagTokenizer.nextToken();
+ CloseTagToken token = (CloseTagToken) WikipediaFilter.CLOSE_TAGS.get(tokenString);
+ if (token == null) {
+ return WikipediaFilter.TokenNotFound;
+ }
+ Object topToken = fTokenStack.peek();
+ if (topToken instanceof OpenTagToken && ((OpenTagToken) topToken).getTagName() == token.getTagName()) {
+ fTokenStack.pop();
+ // if (token.getTagName().equals("table")) {
+ // fTableStack.pop();
+ // }
+ copyWhite(fWhiteStart, fWhiteStartPosition, 3 + tokenString.length());
+ fWhiteStart = false;
+ fResultBuffer.append(token.getCloseTag());
+ return WikipediaFilter.TokenIgnore;
+ }
+ fWhiteStart = false;
+ unexpectedTag(token.getTagName());
+ return WikipediaFilter.TokenIgnore;
+ } catch (NoSuchElementException e) {
+ return WikipediaFilter.TokenNotFound;
+ }
+
+ } else {
+ // opening tag
+ String tokenString;
+ int tagNameStart = fCurrentPosition;
+ int tokenLength = 0;
+ while (Character.isJavaIdentifierStart(fSource[fCurrentPosition++])) {
+ tokenLength++;
+ }
+ try {
+ tokenString = new String(fSource, tagNameStart, fCurrentPosition - tagNameStart - 1); //tagTokenizer.nextToken();
+ OpenTagToken token = (OpenTagToken) WikipediaFilter.OPEN_TAGS.get(tokenString);
+ if (token == null) {
+ return WikipediaFilter.TokenNotFound;
+ }
+ copyWhite(fWhiteStart, fWhiteStartPosition, (fCurrentPosition - tagNameStart) + 1);
+ fWhiteStart = false;
+
+ if (token instanceof SpecialTagToken) {
+ fResultBuffer.append(token.getOpenTag());
+ while (Character.isWhitespace(fSource[fCurrentPosition])) {
+ fCurrentPosition++;
+ }
+ if (fSource[fCurrentPosition] == '/') {
+ fCurrentPosition++;
+ }
+ if (fSource[fCurrentPosition] == '>') {
+ fCurrentPosition++;
+ }
+ } else if (token instanceof OpenTagToken) {
+ fResultBuffer.append("<");
+ fResultBuffer.append(token.getTagName());
+ fTokenStack.push(token);
+ fCurrentPosition = token.scan(fResultBuffer, fSource, fCurrentPosition - 1);
+ fResultBuffer.append(">");
+ }
+
+ // System.out.println(fResultBuffer);
+ return WikipediaFilter.TokenIgnore;
+ } catch (NoSuchElementException e) {
+ return WikipediaFilter.TokenNotFound;
+ }
+ }
+
+ } catch (IndexOutOfBoundsException e) {
+ //
+ }
+ fCurrentPosition = currentHtmlPosition;
+ return WikipediaFilter.TokenNotFound;
+ }
+
+ public final boolean getNextChar(char testedChar) {
+ int temp = fCurrentPosition;
+ try {
+ fCurrentCharacter = fSource[fCurrentPosition++];
+ if (fCurrentCharacter != testedChar) {
+ fCurrentPosition = temp;
+ return false;
+ }
+ return true;
+
+ } catch (IndexOutOfBoundsException e) {
+ fCurrentPosition = temp;
+ return false;
+ }
+ }
+
+ public final int getNextChar(char testedChar1, char testedChar2) {
+ int temp = fCurrentPosition;
+ try {
+ int result;
+ fCurrentCharacter = fSource[fCurrentPosition++];
+ if (fCurrentCharacter == testedChar1)
+ result = 0;
+ else if (fCurrentCharacter == testedChar2)
+ result = 1;
+ else {
+ fCurrentPosition = temp;
+ return -1;
+ }
+ return result;
+ } catch (IndexOutOfBoundsException e) {
+ fCurrentPosition = temp;
+ return -1;
+ }
+ }
+
+ public final boolean getNextCharAsDigit() {
+ int temp = fCurrentPosition;
+ try {
+ fCurrentCharacter = fSource[fCurrentPosition++];
+ if (!Character.isDigit(fCurrentCharacter)) {
+ fCurrentPosition = temp;
+ return false;
+ }
+ return true;
+ } catch (IndexOutOfBoundsException e) {
+ fCurrentPosition = temp;
+ return false;
+ }
+ }
+
+ public final boolean getNextCharAsDigit(int radix) {
+
+ int temp = fCurrentPosition;
+ try {
+ fCurrentCharacter = fSource[fCurrentPosition++];
+
+ if (Character.digit(fCurrentCharacter, radix) == -1) {
+ fCurrentPosition = temp;
+ return false;
+ }
+ return true;
+ } catch (IndexOutOfBoundsException e) {
+ fCurrentPosition = temp;
+ return false;
+ }
+ }
+
+ public final int getNumberOfChar(char testedChar) {
+ int number = 0;
+ try {
+ while ((fCurrentCharacter = fSource[fCurrentPosition++]) == testedChar) {
+ number++;
+ }
+ } catch (IndexOutOfBoundsException e) {
+
+ }
+ fCurrentPosition--;
+ return number;
+ }
+
+ public final char[] getListChars() {
+
+ int startPosition = fCurrentPosition - 1;
+ try {
+ while (true) {
+ fCurrentCharacter = fSource[fCurrentPosition++];
+ if (fCurrentCharacter != '*' && fCurrentCharacter != '#') {
+ break;
+ }
+ }
+ } catch (IndexOutOfBoundsException e) {
+ //
+ }
+ fCurrentPosition--;
+ char[] result = new char[fCurrentPosition - startPosition];
+ System.arraycopy(fSource, startPosition, result, 0, fCurrentPosition - startPosition);
+ return result;
+ }
+
+ public boolean getNextCharAsWikiPluginIdentifierPart() {
+ int temp = fCurrentPosition;
+ try {
+ fCurrentCharacter = fSource[fCurrentPosition++];
+
+ if (!WikipediaFilter.isWikiPluginIdentifierPart(fCurrentCharacter)) {
+ fCurrentPosition = temp;
+ return false;
+ }
+ return true;
+ } catch (IndexOutOfBoundsException e) {
+ fCurrentPosition = temp;
+ return false;
+ }
+ }
+
+ private void stopList() {
+ while (!fTokenStack.isEmpty()) {
+ AbstractTag tok = (AbstractTag) fTokenStack.peek();
+ if (tok.equals(WikipediaFilter.LIST_UL_START)) {
+ fTokenStack.pop();
+ fResultBuffer.append("");
+ } else if (tok.equals(WikipediaFilter.LIST_OL_START)) {
+ fTokenStack.pop();
+ fResultBuffer.append("");
+ } else if (tok == WikipediaFilter.BOLD) {
+ fTokenStack.pop();
+ fResultBuffer.append("");
+ } else if (tok == WikipediaFilter.ITALIC) {
+ fTokenStack.pop();
+ fResultBuffer.append("");
+ } else if (tok == WikipediaFilter.STRONG) {
+ fTokenStack.pop();
+ fResultBuffer.append("");
+ } else if (tok == WikipediaFilter.EM) {
+ fTokenStack.pop();
+ fResultBuffer.append("");
+ } else if (tok == WikipediaFilter.STRIKETHROUGH) {
+ fTokenStack.pop();
+ fResultBuffer.append("");
+ } else {
+ break;
+ }
+ }
+ }
+
+ protected int getNextToken() throws InvalidInputException {
+ boolean startOfIndent = false;
+ fWhiteStartPosition = 0;
+ fWhiteStart = false;
+ try {
+ while (true) {
+ // fStartPosition = fCurrentPosition;
+ fCurrentCharacter = fSource[fCurrentPosition++];
+
+ // ---------Identify the next token-------------
+ switch (fCurrentCharacter) {
+ case '\n':
+ if (fWhiteStart) {
+ int tempPosition = checkWhitespaces(fWhiteStartPosition, fCurrentPosition - 2);
+ if (tempPosition >= 0) {
+ copyWhite(fWhiteStart, fWhiteStartPosition, fCurrentPosition - (++tempPosition));
+ fWhiteStart = false;
+ stopList();
+ fResultBuffer.append("
");
+ // continue;
+ }
+
+ }
+ int fStartPrePosition = fCurrentPosition;
+ boolean preSection = false;
+ try {
+ while (fSource[fCurrentPosition++] == ' ') {
+ fCurrentCharacter = fSource[fCurrentPosition++];
+ while (fCurrentCharacter != '\n') {
+ if (!Character.isWhitespace(fCurrentCharacter)) {
+ // preformatted section starts here
+ preSection = true;
+ }
+ fCurrentCharacter = fSource[fCurrentPosition++];
+ }
+ }
+ --fCurrentPosition;
+ } catch (IndexOutOfBoundsException e) {
+
+ }
+ if (preSection && fRecursionLevel == 1) {
+ String preString;
+ copyWhite(fWhiteStart, fStartPrePosition, fCurrentPosition - fStartPrePosition);
+ fWhiteStart = true;
+ fResultBuffer.append("");
+ // copyWhite(fWhiteStart, fStartPrePosition, 1);
+ preString = new String(fSource, fStartPrePosition, fCurrentPosition - fStartPrePosition - 1) + '\n';
+ fResultBuffer.append(WikipediaFilter.filterParser(preString, fContext, fMacros, fRecursionLevel));
+ // preString = new String(fSource, fStartPrePosition, fCurrentPosition - fStartPrePosition - 1)+'\n';
+ // int preIndex = 0;
+ // int lastIndex = 0;
+ // while (preIndex>=0) {
+ // preIndex = preString.indexOf('\n', lastIndex);
+ // if (preIndex>=0) {
+ // fResultBuffer.append(WikipediaFilter.filterParser(preString.substring(lastIndex,preIndex), fContext,
+ // fCachedPage, fMacros, fRecursionLevel));
+ // fResultBuffer.append('\n');
+ // lastIndex = ++preIndex;
+ // }
+ // }
+ fResultBuffer.append("
");
+ fWhiteStart = false;
+ continue;
+ } else {
+ fCurrentPosition = fStartPrePosition;
+ }
+ break;
+ case ':':
+ if (isStartOfLine()) {
+ copyWhite(fWhiteStart, fWhiteStartPosition, 1);
+ fWhiteStart = false;
+
+ int levelHeader = getNumberOfChar(':') + 1;
+ int startHeadPosition = fCurrentPosition;
+ if (readUntilEOL()) {
+ String head = new String(fSource, startHeadPosition, fCurrentPosition - startHeadPosition);
+ for (int i = 0; i < levelHeader; i++) {
+ fResultBuffer.append("- ");
+ }
+ fResultBuffer.append(head);
+ for (int i = 0; i < levelHeader; i++) {
+ fResultBuffer.append("
");
+ }
+ continue;
+ }
+
+ continue;
+ }
+ break;
+ case ';':
+ if (isStartOfLine() && getNextChar(' ')) {
+ copyWhite(fWhiteStart, fWhiteStartPosition, 1);
+ fWhiteStart = false;
+
+ int startHeadPosition = fCurrentPosition;
+ if (readUntilEOL()) {
+ // TODO not correct - improve this
+ String head = new String(fSource, startHeadPosition, fCurrentPosition - startHeadPosition);
+ int index = head.indexOf(": ");
+ if (index > 0) {
+ fResultBuffer.append("- ");
+ fResultBuffer.append(head.substring(0,index));
+ fResultBuffer.append("
- ");
+ fResultBuffer.append(head.substring(index+2));
+ fResultBuffer.append("
");
+ } else {
+ fResultBuffer.append("- ");
+ fResultBuffer.append(head);
+ fResultBuffer.append("
");
+ }
+ continue;
+ }
+
+ continue;
+ }
+ break;
+ // case '\\': // special characters follow
+ // copyWhite(fWhiteStart, fWhiteStartPosition, 1);
+ // fWhiteStart = false;
+ // try {
+ // fCurrentCharacter = fSource[fCurrentPosition++];
+ // switch (fCurrentCharacter) {
+ // case '\\': // newline
+ // if ((fCurrentCharacter = fSource[fCurrentPosition++]) == '\\') {
+ // fResultBuffer.append(Encoder
+ // .toEntity(fCurrentCharacter));
+ // break;
+ // } else {
+ // fResultBuffer.append("
");
+ // break;
+ // }
+ // default:
+ // fResultBuffer.append(Encoder
+ // .toEntity(fCurrentCharacter));
+ // }
+ // } catch (IndexOutOfBoundsException e) {
+ //
+ // }
+ // continue;
+ // case '$' : // detect tex math
+ // copyWhite(fWhiteStart, fWhiteStartPosition, 1);
+ // fWhiteStart = false;
+ // startOfIndent = false;
+ // int startMathPosition = fCurrentPosition;
+ // if (getNextChar('$')) {
+ // startMathPosition = fCurrentPosition;
+ // copyWhite(fWhiteStart, fWhiteStartPosition, 2);
+ // fWhiteStart = false;
+ // if (readUntilString("$$")) {
+ // String mathContent = new String(fSource, startMathPosition,
+ // fCurrentPosition - startMathPosition - 2);
+ // if (mathContent != null) {
+ // handleTeXMath(mathContent, false);
+ // continue;
+ // }
+ // }
+ // } else {
+ // if (readUntilChar('$')) {
+ // String mathContent = new String(fSource, startMathPosition,
+ // fCurrentPosition - startMathPosition - 1);
+ // if (mathContent != null) {
+ // handleTeXMath(mathContent, true);
+ // continue;
+ // }
+ // }
+ // }
+ // break;
+ case '{':
+ // detect macros
+ copyWhite(fWhiteStart, fWhiteStartPosition, 1);
+ fWhiteStart = false;
+ // boolean scanBody = true;
+ int startMacroPosition = fCurrentPosition;
+ if (getNextChar('|') && handleWikipediaTable()) { // Wikipedia
+ // table
+ // syntax
+ continue;
+ } else {
+ if (readUntilChar('}')) {
+ String macroStartTag;
+
+ macroStartTag = new String(fSource, startMacroPosition, fCurrentPosition - startMacroPosition - 1);
+ if (macroStartTag != null) {
+ createMacro(startMacroPosition, macroStartTag);
+ continue;
+ }
+ }
+ }
+ break;
+ case '[':
+ int startLinkPosition = fCurrentPosition;
+ if (getNextChar('[')) { // wikipedia link style
+ startLinkPosition = fCurrentPosition;
+ copyWhite(fWhiteStart, fWhiteStartPosition, 2);
+ fWhiteStart = false;
+ if (readUntilString("]]")) {
+ String name = new String(fSource, startLinkPosition, fCurrentPosition - startLinkPosition - 2);
+ // test for suffix string
+ int temp = fCurrentPosition;
+ StringBuffer suffixBuffer = new StringBuffer();
+ try {
+ while (true) {
+ fCurrentCharacter = fSource[fCurrentPosition++];
+ if (!Character.isLetterOrDigit(fCurrentCharacter)) {
+ fCurrentPosition--;
+ break;
+ }
+ suffixBuffer.append(fCurrentCharacter);
+ }
+ handleWikipediaLink(name, suffixBuffer.toString());
+ continue;
+ } catch (IndexOutOfBoundsException e) {
+ fCurrentPosition = temp;
+ }
+
+ handleWikipediaLink(name, "");
+ continue;
+ }
+
+ } else {
+ copyWhite(fWhiteStart, fWhiteStartPosition, 1);
+ fWhiteStart = false;
+
+ if (readUntilChar(']')) {
+ String name = new String(fSource, startLinkPosition, fCurrentPosition - startLinkPosition - 1);
+ handleSnipLink(name);
+ continue;
+ }
+ }
+ break;
+ // case '1': // heading filter ?
+ // int temp1Position = checkWhitespaces(fWhiteStartPosition,
+ // fCurrentPosition - 2);
+ // if (temp1Position >= 0) {
+ // copyWhite(fWhiteStart, fWhiteStartPosition, 1);
+ // fWhiteStart = false;
+ // int simpleHeader = getNextChar(' ', '.');
+ // if (simpleHeader < 0) {
+ // if (getNextChar('1')) {
+ // fCurrentPosition--;
+ // if (getList('1', "", "
")) {
+ // continue;
+ // }
+ // }
+ // break;
+ // }
+ // if (simpleHeader == 1 && !getNextChar('1')) {
+ // fCurrentPosition--;
+ // if (getList('1', "", "
")) {
+ // continue;
+ // }
+ // break;
+ // }
+ // temp1Position = fCurrentPosition;
+ // if (simpleHeader >= 0 && readUntilChar('\n')) {
+ // String heading = new String(fSource, temp1Position,
+ // fCurrentPosition - temp1Position - 1);
+ // if (heading != null) {
+ // fResultBuffer.append("");
+ // // System.out.println(heading);
+ // fResultBuffer
+ // .append(WikipediaFilter
+ // .filterParser(
+ // heading,
+ // fContext,
+ // WikipediaFilter.DUMMY_CACHED_PAGE,
+ // fMacros,
+ // fRecursionLevel));
+ // fResultBuffer.append("
");
+ // continue;
+ // }
+ // }
+ // }
+ // break;
+ case '*': // list
+ case '#': // list
+ if (isStartOfLine()) {
+ char[] listChars = getListChars();
+ int tempStarPosition = checkWhitespaces(fWhiteStartPosition, fCurrentPosition - 1 - listChars.length);
+ if (tempStarPosition >= 0) {
+ appendList(listChars);
+ continue;
+ }
+ }
+ break;
+ // case '#': // list
+ // if (fCurrentPosition >= 2) {
+ // char beforeChar = fSource[fCurrentPosition - 2];
+ // if (beforeChar == '\n' || beforeChar == '\r') {
+ //
+ // int levelHash = getNumberOfChar('#') + 1;
+ //
+ // int tempHashPosition = checkWhitespaces(fWhiteStartPosition, fCurrentPosition - 1 - levelHash);
+ // if (tempHashPosition >= 0) {
+ // copyWhite(fWhiteStart, fWhiteStartPosition, levelHash);
+ // fWhiteStart = false;
+ // AbstractTag tok = (AbstractTag) fTokenStack.peek();
+ // if (tok instanceof ListToken) {
+ // ListToken listToken = (ListToken) tok;
+ // int topLevel = listToken.getLevel();
+ // if (listToken.getToken() == WikipediaFilter.TokenLIST_OL_START) {
+ // if (levelHash > topLevel) {
+ // fTokenStack.push(new ListToken(WikipediaFilter.TokenLIST_OL_START, topLevel + 1));
+ // fResultBuffer.append("- ");
+ // } else if (levelHash < topLevel) {
+ // fTokenStack.pop();
+ // fResultBuffer.append("
- ");
+ // } else {
+ // fResultBuffer.append("
- ");
+ // }
+ // } else {
+ // fTokenStack.push(new ListToken(WikipediaFilter.TokenLIST_OL_START, levelHash));
+ // fResultBuffer.append("
- ");
+ // }
+ // } else {
+ // fTokenStack.push(new ListToken(WikipediaFilter.TokenLIST_OL_START, 1));
+ // fResultBuffer.append("\n
- ");
+ // }
+ // continue;
+ // }
+ // }
+ // // }
+ // }
+ // break;
+
+ // case 'i': //
list
+ // if (getList('i', "", "
")) {
+ // continue;
+ // }
+ // break;
+ // case 'I': // list
+ // if (getList('i', "", "
")) {
+ // continue;
+ // }
+ // break;
+ // case 'a' : // list
+ // if (getList('a', "", "
")) {
+ // continue;
+ // }
+ // break;
+ // case 'A' : // list
+ // if (getList('A', "", "
")) {
+ // continue;
+ // }
+ // break;
+ // case 'g' : // list
+ // if (getList('g', "", "
")) {
+ // continue;
+ // }
+ // break;
+ // case 'H' : // list
+ // if (getList('H', "", "
")) {
+ // continue;
+ // }
+ // break;
+ // case 'k' : // list
+ // if (getList('k', "", "
")) {
+ // continue;
+ // }
+ // break;
+ // case 'K' : // list
+ // if (getList('K', "", "
")) {
+ // continue;
+ // }
+ // break;
+ // case 'j' : // list
+ // if (getList('j', "", "
")) {
+ // continue;
+ // }
+ // break;
+
+ case '\'':
+ if (getNextChar('\'')) {
+ if (getNextChar('\'')) {
+ copyWhite(fWhiteStart, fWhiteStartPosition, 3);
+ fWhiteStart = false;
+ return WikipediaFilter.TokenSTRONG;
+ }
+ copyWhite(fWhiteStart, fWhiteStartPosition, 2);
+ fWhiteStart = false;
+ return WikipediaFilter.TokenEM;
+ }
+ break;
+ // case '_':
+ // if (getNextChar('_')) {
+ // copyWhite(fWhiteStart, fWhiteStartPosition, 2);
+ // fWhiteStart = false;
+ // return WikipediaFilter.TokenBOLD;
+ // }
+ // break;
+ // case '~':
+ // if (getNextChar('~')) {
+ // copyWhite(fWhiteStart, fWhiteStartPosition, 2);
+ // fWhiteStart = false;
+ // return WikipediaFilter.TokenITALIC;
+ // }
+ // break;
+ case '-':
+ int tempCurrPosition = fCurrentPosition;
+ try {
+ if (fSource[tempCurrPosition++] == '-' && fSource[tempCurrPosition++] == '-' && fSource[tempCurrPosition++] == '-') {
+ if (fSource[tempCurrPosition] == '\n') {
+ fCurrentPosition = tempCurrPosition;
+ fResultBuffer.append("
");
+ fWhiteStart = false;
+ continue;
+ } else if (fSource[tempCurrPosition++] == '\r' && fSource[tempCurrPosition++] == '\n') {
+ fCurrentPosition = tempCurrPosition - 1;
+ fResultBuffer.append("
");
+ fWhiteStart = false;
+ continue;
+ }
+ }
+ } catch (IndexOutOfBoundsException e) {
+
+ }
+
+ // int levelMinus = getNumberOfChar('-') + 1;
+ // if (getNextChar(' ')) {
+ // int tempPosition = checkWhitespaces(
+ // fWhiteStartPosition, fCurrentPosition - 2
+ // - levelMinus);
+ // if (tempPosition >= 0) {
+ // copyWhite(fWhiteStart, fWhiteStartPosition,
+ // 1 + levelMinus);
+ // fWhiteStart = false;
+ // AbstractTag tok = (AbstractTag) fTokenStack.peek();
+ // if (tok instanceof ListToken) {
+ // ListToken listToken = (ListToken) tok;
+ // int topLevel = listToken.getLevel();
+ // if (listToken.getToken() ==
+ // WikipediaFilter.TokenLIST_UL_START) {
+ // if (levelMinus > topLevel) {
+ // fTokenStack
+ // .push(new ListToken(
+ // WikipediaFilter.TokenLIST_UL_START,
+ // topLevel + 1));
+ // fResultBuffer
+ // .append("- ");
+ // } else if (levelMinus < topLevel) {
+ // fTokenStack.pop();
+ // fResultBuffer
+ // .append("
- ");
+ // } else {
+ // fResultBuffer.append("
- ");
+ // }
+ // } else {
+ // fTokenStack
+ // .push(new ListToken(
+ // WikipediaFilter.TokenLIST_UL_START,
+ // levelMinus));
+ // fResultBuffer
+ // .append("
- ");
+ // }
+ // } else {
+ // fTokenStack
+ // .push(new ListToken(
+ // WikipediaFilter.TokenLIST_UL_START,
+ // 1));
+ // fResultBuffer
+ // .append("\n
- ");
+ // }
+ // continue;
+ // }
+ // }
+ // if (levelMinus == 2) {
+ // copyWhite(fWhiteStart, fWhiteStartPosition, 2);
+ // fWhiteStart = false;
+ // return WikipediaFilter.TokenSTRIKETHROUGH;
+ // }
+ break;
+ case 'h': // http(s)://
+ int urlStartPosition = fCurrentPosition;
+ boolean foundUrl = false;
+ int diff = 7;
+ try {
+ String urlString = fStringSource.substring(fCurrentPosition - 1, fCurrentPosition + 3);
+ if (urlString.equals("http")) {
+ fCurrentPosition += 3;
+ fCurrentCharacter = fSource[fCurrentPosition++];
+ if (fCurrentCharacter == 's') { // optional
+ fCurrentCharacter = fSource[fCurrentPosition++];
+ diff++;
+ }
+
+ if (fCurrentCharacter == ':' && fSource[fCurrentPosition++] == '/' && fSource[fCurrentPosition++] == '/') {
+ copyWhite(fWhiteStart, fWhiteStartPosition, diff);
+ fWhiteStart = false;
+ foundUrl = true;
+ while (WikipediaFilter.isUrlIdentifierPart(fSource[fCurrentPosition++])) {
+ }
+ }
+ }
+ } catch (IndexOutOfBoundsException e) {
+ if (!foundUrl) {
+ // rollback work :-)
+ fCurrentPosition = urlStartPosition;
+ }
+ }
+ if (foundUrl) {
+ String urlString = new String(fSource, urlStartPosition - 1, fCurrentPosition - urlStartPosition);
+ fCurrentPosition--;
+ WikipediaFilter.createExternalLink(fResultBuffer, fWikiEngine, urlString);
+ continue;
+ }
+ break;
+
+ // case '@': // images @xml@ -> /static/rss-small.png
+ // copyWhite(fWhiteStart, fWhiteStartPosition, 1);
+ // fWhiteStart = false;
+ // int atStart = fCurrentPosition;
+ // if (readUntilChar('@')) {
+ // String imageTag = new String(fSource, atStart, fCurrentPosition - atStart - 1);
+ // if (imageTag != null) {
+ // if (WikipediaFilter.createStaticImage(imageTag, fResultBuffer)) {
+ // continue;
+ // }
+ // }
+ // }
+ // fCurrentPosition = atStart;
+ // break;
+ case '&':
+ int ampersandStart = fCurrentPosition - 1;
+ if (getNextChar('#')) {
+ try {
+ StringBuffer num = new StringBuffer(5);
+ char ch = fSource[fCurrentPosition++];
+ while (Character.isDigit(ch)) {
+ num.append(ch);
+ ch = fSource[fCurrentPosition++];
+ }
+ if (num.length() > 0 && ch == ';') {
+ Integer i = Integer.valueOf(num.toString());
+ if (i.intValue() < 65536) {
+ copyWhite(fWhiteStart, fWhiteStartPosition, 3 + num.length());
+ fWhiteStart = false;
+ fResultBuffer.append(fSource, ampersandStart, fCurrentPosition - ampersandStart);
+ continue;
+ }
+ }
+ } catch (IndexOutOfBoundsException e) {
+ // ignore exception
+ } catch (NumberFormatException e) {
+ // ignore exception
+ }
+ } else {
+ try {
+ StringBuffer entity = new StringBuffer(10);
+ char ch = fSource[fCurrentPosition++];
+ while (Character.isLetterOrDigit(ch)) {
+ entity.append(ch);
+ ch = fSource[fCurrentPosition++];
+ }
+ if (entity.length() > 0 && ch == ';') {
+ if (WikipediaFilter.ENTITY_SET.contains(entity.toString())) {
+ copyWhite(fWhiteStart, fWhiteStartPosition, 2 + entity.length());
+ fWhiteStart = false;
+ fResultBuffer.append(fSource, ampersandStart, fCurrentPosition - ampersandStart);
+ continue;
+ }
+ }
+ } catch (IndexOutOfBoundsException e) {
+ // ignore exception
+ } catch (NumberFormatException e) {
+ // ignore exception
+ }
+ }
+ break;
+ case '<':
+ int htmlStartPosition = fCurrentPosition;
+ try {
+ switch (fStringSource.charAt(fCurrentPosition)) {
+ case '!': //
+ String htmlCommentString = fStringSource.substring(fCurrentPosition - 1, fCurrentPosition + 3);
+
+ if (htmlCommentString.equals("")) {
+ String htmlCommentContent = new String(fSource, htmlStartPosition + 3, fCurrentPosition - htmlStartPosition - 6);
+ if (htmlCommentContent != null) {
+ copyWhite(fWhiteStart, fWhiteStartPosition, fCurrentPosition - htmlStartPosition + 1);
+ fWhiteStart = false;
+ // insert html comment for visual checks
+ // only:
+ /*
+ * fResultBuffer.append(" ");
+ */
+ continue;
+ }
+ }
+ }
+ break;
+ // case 'm' : // math
+ // String mathString =
+ // fStringSource.substring(fCurrentPosition - 1,
+ // fCurrentPosition + 5);
+
+ // if (mathString.equals("")) {
+ // String mathContent = new String(fSource,
+ // htmlStartPosition + 5, fCurrentPosition -
+ // htmlStartPosition - 12);
+ // if (mathContent != null) {
+ // copyWhite(fWhiteStart, fWhiteStartPosition,
+ // fCurrentPosition - htmlStartPosition + 1);
+ // fWhiteStart = false;
+ // if (startOfIndent) {
+ // startOfIndent = false;
+ // handleTeXMath(mathContent, false);
+ // } else {
+ // handleTeXMath(mathContent, true);
+ // }
+ // continue;
+ // }
+ // }
+ // }
+ // break;
+ case 'n': // nowiki
+ String nowikiString = fStringSource.substring(fCurrentPosition - 1, fCurrentPosition + 7);
+
+ if (nowikiString.equals("")) {
+ fCurrentPosition += 7;
+ if (readUntilString("")) {
+ String nowikiContent = new String(fSource, htmlStartPosition + 7, fCurrentPosition - htmlStartPosition - 16);
+ if (nowikiContent != null) {
+ copyWhite(fWhiteStart, fWhiteStartPosition, fCurrentPosition - htmlStartPosition + 1);
+ fWhiteStart = false;
+ copyNowikiNewLine(nowikiContent);
+ continue;
+ }
+ }
+ }
+ break;
+ }
+ } catch (IndexOutOfBoundsException e) {
+
+ }
+ startOfIndent = false;
+ fCurrentPosition = htmlStartPosition;
+ // detect special html tags
+ int htmlToken = getHTMLToken();
+ if (htmlToken == WikipediaFilter.TokenIgnore) {
+ continue;
+ // } else if (htmlToken > TokenIgnore) {
+ // return htmlToken;
+ }
+ fCurrentPosition = htmlStartPosition;
+ break;
+ case '=': // wikipedia header ?
+ if (isStartOfLine()) {
+ int levelHeader = getNumberOfChar('=') + 1;
+ // int tempPosition = checkWhitespaces(fWhiteStartPosition, fCurrentPosition - 1 - levelHeader);
+ // if (tempPosition >= 0) {
+ copyWhite(fWhiteStart, fWhiteStartPosition, levelHeader);
+ fWhiteStart = false;
+ int startHeadPosition = fCurrentPosition;
+ // int initialOffset = levelHeader;
+ if (levelHeader > 6) {
+ levelHeader = 6;
+ }
+ levelHeader--;
+ if (readUntilString(WikipediaFilter.HEADER_STRINGS[levelHeader])) {
+ String head = new String(fSource, startHeadPosition, fCurrentPosition - startHeadPosition - (1 + levelHeader));
+ levelHeader++;
+ handleHead(head, levelHeader);
+ continue;
+ }
+ // }
+ }
+ break;
+ }
+ if (!fWhiteStart) {
+ fWhiteStart = true;
+ fWhiteStartPosition = fCurrentPosition - 1;
+ }
+
+ startOfIndent = false;
+ }
+ // -----------------end switch while try--------------------
+ } catch (IndexOutOfBoundsException e) {
+ // end of scanner text
+ }
+ copyWhite(fWhiteStart, fWhiteStartPosition, 1);
+
+ return WikipediaFilter.TokenEOF;
+ }
+
+ /**
+ * @return
+ */
+ private boolean isStartOfLine() {
+ boolean isListStart = false;
+ if (fCurrentPosition >= 2) {
+ char beforeChar = fSource[fCurrentPosition - 2];
+ if (beforeChar == '\n' || beforeChar == '\r') {
+ isListStart = true;
+ }
+ }
+ if (fCurrentPosition == 1) {
+ isListStart = true;
+ }
+ return isListStart;
+ }
+
+ /**
+ * @param levelStar
+ * @param listChars
+ * TODO
+ */
+ private void appendList(char[] listChars) {
+ int topLevel = 0;
+ int levelStar = listChars.length;
+ copyWhite(fWhiteStart, fWhiteStartPosition, levelStar);
+ fWhiteStart = false;
+ AbstractTag tok = (AbstractTag) fTokenStack.peek();
+
+ if (tok instanceof ListToken) {
+ ListToken listToken = (ListToken) tok;
+ topLevel = listToken.getLevel();
+
+ if (levelStar > topLevel) {
+ while (levelStar > topLevel) {
+ if (listChars[topLevel] == '*') {
+ fTokenStack.push(new ListToken(WikipediaFilter.TokenLIST_UL_START, ++topLevel));
+ fResultBuffer.append("
- ");
+ } else {
+ fTokenStack.push(new ListToken(WikipediaFilter.TokenLIST_OL_START, ++topLevel));
+ fResultBuffer.append("
- ");
+ }
+ }
+ } else if (levelStar < topLevel) {
+ while (levelStar < topLevel) {
+ tok = (AbstractTag) fTokenStack.peek();
+ if (tok instanceof ListToken) {
+ fTokenStack.pop();
+ listToken = (ListToken) tok;
+ if (listToken.getToken() == WikipediaFilter.TokenLIST_UL_START) {
+ fResultBuffer.append("
- ");
+ } else {
+ fResultBuffer.append("
- ");
+ }
+ topLevel--;
+ } else {
+ break;
+ }
+ }
+ } else {
+ --topLevel;
+ if (listToken.getToken() == WikipediaFilter.TokenLIST_UL_START && listChars[topLevel] == '#') {
+ fTokenStack.pop();
+ fTokenStack.push(new ListToken(WikipediaFilter.TokenLIST_OL_START, topLevel));
+ fResultBuffer.append("
- ");
+ } else if (listToken.getToken() == WikipediaFilter.TokenLIST_OL_START && listChars[topLevel] == '*') {
+ fTokenStack.pop();
+ fTokenStack.push(new ListToken(WikipediaFilter.TokenLIST_UL_START, topLevel));
+ fResultBuffer.append("