package org.plog4u.wiki.filter;
import java.io.IOException;
import java.io.Writer;
import java.util.ArrayList;
import java.util.NoSuchElementException;
import java.util.Stack;
import java.util.StringTokenizer;
//import org.apache.commons.logging.Log;
//import org.apache.commons.logging.LogFactory;
import org.plog4u.wiki.filter.WikipediaFilter.InvalidInputException;
import org.plog4u.wiki.filter.tags.AbstractTag;
import org.plog4u.wiki.filter.tags.CloseTagToken;
import org.plog4u.wiki.filter.tags.ListToken;
import org.plog4u.wiki.filter.tags.OpenTagToken;
import org.plog4u.wiki.filter.tags.SpecialTagToken;
import org.radeox.api.engine.ImageRenderEngine;
import org.radeox.api.engine.IncludeRenderEngine;
import org.radeox.api.engine.RenderEngine;
import org.radeox.api.engine.WikiRenderEngine;
import org.radeox.filter.context.FilterContext;
import org.radeox.filter.interwiki.InterWiki;
import org.radeox.macro.Macro;
import org.radeox.macro.MacroRepository;
import org.radeox.macro.parameter.MacroParameter;
import org.radeox.util.Encoder;
import org.radeox.util.StringBufferWriter;
/**
* A parser for the WikipediaFilter
*
* @see org.plog4u.wiki.filter.WikipediaFilter
*/
public class WikipediaParser {
// private static Log log = LogFactory.getLog(WikipediaFilter.class);
MacroRepository fMacros;
private FilterContext fContext;
private RenderEngine fWikiEngine;
// TODO check, if this counter is correct in recursions:
private int fImageCounter;
/**
* The current snip
*/
// private Snip fSnip;
/**
* If the snip contains headings for a "table of content" this buffer temporarily contains the start of the snip and the
* "table of content"
*/
private StringBuffer fResultBufferHeader = null;
/**
* The buffer for the resulting HTML rendering from the current snip.
*/
private StringBuffer fResultBuffer;
/**
* The wiki syntax string which should be parsed
*/
private char[] fSource;
/**
* The corresponding String for the character source array
*/
private final String fStringSource;
/**
* The current scanned character
*/
private char fCurrentCharacter;
/**
* The current offset in the character source array
*/
private int fCurrentPosition;
/**
* The current recursion level for this parser
*/
private int fRecursionLevel;
private Stack fTokenStack;
// private Stack fTableStack;
private boolean fWhiteStart = false;
private int fWhiteStartPosition = 0;
// private TeXParser fTeXParser;
// private TeXParser fTeXImageParser;
/**
*
* "table of content"
*
*/
private ArrayList fTableOfContent = null;
// private String fSrcPath;
// private String fBinPath;
public WikipediaParser(MacroRepository macros, String stringSource, StringBuffer result, FilterContext context, int recursionLevel) {
fContext = context;
fWikiEngine = context.getRenderContext().getRenderEngine();
// try {
// SnipMacroParameter params = (SnipMacroParameter)
// fContext.getMacroParameter();
// fSnip = params.getSnipRenderContext().getSnip();
// } catch (ClassCastException e) {
// e.printStackTrace();
// }
fMacros = macros;
fResultBuffer = result;
fStringSource = stringSource;
setSource(stringSource.toCharArray());
fRecursionLevel = recursionLevel;
fTokenStack = new Stack();
// fTableStack = new Stack();
// fTeXParser = new TeXParser("", "m:");
// fTeXImageParser = new TeXParser("", "");
fImageCounter = 1;
// fSrcPath = (String) fContext.getRenderContext().get("srcpath");
// if (fSrcPath==null) {
// fSrcPath = "";
// }
// fBinPath = (String) fContext.getRenderContext().get("binpath");
// if (fBinPath==null) {
// fBinPath = "";
// }
}
/**
* Check until a new-line was found, if there are only whitespace characters before the given endposition.
*
* @param startPosition
* @param endPosition
* @return -1 if no whitespace line is found from the end (i.e. endPosition); otherwise the offset directly after where the
* new-line was found
*/
private int checkWhitespaces(int startPosition, int endPosition) {
char tempChar;
while (endPosition >= startPosition) {
if ((tempChar = fSource[endPosition--]) == '\n') {
return endPosition + 2;
}
if (tempChar != ' ' && tempChar != '\t' && tempChar != '\r') {
return -1;
}
}
if (endPosition < startPosition && endPosition >= 0) {
if ((tempChar = fSource[endPosition]) != '\n') {
return -1;
}
} else if (endPosition == (-1) && startPosition == 0) {
// special case at the start of a string
return 0;
}
return startPosition;
}
/**
* copy the content in the resulting buffer and escape special html characters (< > " & ')
*/
private void copyWhite(boolean whiteStart, final int whiteStartPosition, final int diff) {
if (whiteStart) {
final int len = fCurrentPosition - diff;
int currentIndex = whiteStartPosition;
int lastIndex = currentIndex;
while (currentIndex < len) {
switch (fSource[currentIndex++]) {
case '<': // special html escape character
if (lastIndex < (currentIndex - 1)) {
fResultBuffer.append(fSource, lastIndex, currentIndex - lastIndex - 1);
lastIndex = currentIndex;
} else {
lastIndex++;
}
fResultBuffer.append("<");
break;
case '>': // special html escape character
if (lastIndex < (currentIndex - 1)) {
fResultBuffer.append(fSource, lastIndex, currentIndex - lastIndex - 1);
lastIndex = currentIndex;
} else {
lastIndex++;
}
fResultBuffer.append(">");
break;
case '&': // special html escape character
if (lastIndex < (currentIndex - 1)) {
fResultBuffer.append(fSource, lastIndex, currentIndex - lastIndex - 1);
lastIndex = currentIndex;
} else {
lastIndex++;
}
fResultBuffer.append("&");
break;
case '\'': // special html escape character
if (lastIndex < (currentIndex - 1)) {
fResultBuffer.append(fSource, lastIndex, currentIndex - lastIndex - 1);
lastIndex = currentIndex;
} else {
lastIndex++;
}
fResultBuffer.append("'");
break;
case '\"': // special html escape character
if (lastIndex < (currentIndex - 1)) {
fResultBuffer.append(fSource, lastIndex, currentIndex - lastIndex - 1);
lastIndex = currentIndex;
} else {
lastIndex++;
}
fResultBuffer.append(""");
break;
}
}
if (lastIndex < (currentIndex)) {
fResultBuffer.append(fSource, lastIndex, currentIndex - lastIndex);
}
fWhiteStart = false;
}
}
/**
* copy the text in the resulting buffer and escape special html characters (< > " & ')
*/
private void copyWhite(String text) {
final int len = text.length();
int currentIndex = 0;
int lastIndex = currentIndex;
while (currentIndex < len) {
switch (text.charAt(currentIndex++)) {
case '<': // special html escape character
if (lastIndex < (currentIndex - 1)) {
fResultBuffer.append(text.substring(lastIndex, currentIndex - 1));
lastIndex = currentIndex;
}
fResultBuffer.append("<");
break;
case '>': // special html escape character
if (lastIndex < (currentIndex - 1)) {
fResultBuffer.append(text.substring(lastIndex, currentIndex - 1));
lastIndex = currentIndex;
} else {
lastIndex++;
}
fResultBuffer.append(">");
break;
case '&': // special html escape character
if (lastIndex < (currentIndex - 1)) {
fResultBuffer.append(text.substring(lastIndex, currentIndex - 1));
lastIndex = currentIndex;
} else {
lastIndex++;
}
fResultBuffer.append("&");
break;
case '\'': // special html escape character
if (lastIndex < (currentIndex - 1)) {
fResultBuffer.append(text.substring(lastIndex, currentIndex - 1));
lastIndex = currentIndex;
} else {
lastIndex++;
}
fResultBuffer.append("'");
break;
case '\"': // special html escape character
if (lastIndex < (currentIndex - 1)) {
fResultBuffer.append(text.substring(lastIndex, currentIndex - 1));
lastIndex = currentIndex;
} else {
lastIndex++;
}
fResultBuffer.append(""");
break;
}
}
if (lastIndex < (currentIndex)) {
fResultBuffer.append(text.substring(lastIndex, currentIndex));
}
}
/**
* Copy the text in the resulting buffer and escape special html characters (< > " & ') Additionally every
* newline will be replaced by <br/>
*/
private void copyNowikiNewLine(String text) {
final int len = text.length();
int currentIndex = 0;
int lastIndex = currentIndex;
while (currentIndex < len) {
switch (text.charAt(currentIndex++)) {
case '\n':
if (lastIndex < (currentIndex - 1)) {
fResultBuffer.append(text.substring(lastIndex, currentIndex - 1));
lastIndex = currentIndex;
} else {
lastIndex++;
}
fResultBuffer.append("
");
break;
case '<': // special html escape character
if (lastIndex < (currentIndex - 1)) {
fResultBuffer.append(text.substring(lastIndex, currentIndex - 1));
lastIndex = currentIndex;
} else {
lastIndex++;
}
fResultBuffer.append("<");
break;
case '>': // special html escape character
if (lastIndex < (currentIndex - 1)) {
fResultBuffer.append(text.substring(lastIndex, currentIndex - 1));
lastIndex = currentIndex;
} else {
lastIndex++;
}
fResultBuffer.append(">");
break;
// case '&': // special html escape character
// if (lastIndex < (currentIndex - 1)) {
// fResultBuffer.append(text.substring(lastIndex, currentIndex - 1));
// lastIndex = currentIndex;
// } else {
// lastIndex++;
// }
// fResultBuffer.append("&");
// break;
case '\'': // special html escape character
if (lastIndex < (currentIndex - 1)) {
fResultBuffer.append(text.substring(lastIndex, currentIndex - 1));
lastIndex = currentIndex;
} else {
lastIndex++;
}
fResultBuffer.append("'");
break;
case '\"': // special html escape character
if (lastIndex < (currentIndex - 1)) {
fResultBuffer.append(text.substring(lastIndex, currentIndex - 1));
lastIndex = currentIndex;
} else {
lastIndex++;
}
fResultBuffer.append(""");
break;
}
}
if (lastIndex < (currentIndex)) {
fResultBuffer.append(text.substring(lastIndex, currentIndex));
}
}
/**
* Render the HTML token which are defined in the OPEN_TAGS and CLOSE_TAGS map
*
* @return
*/
public int getHTMLToken() {
int currentHtmlPosition = fCurrentPosition;
try {
char closeCharacter;
char nextCharacter;
if (getNextChar('/')) {
currentHtmlPosition++;
// closing tag
int r = readUntilCharOrEOL('>');
if (r != 1) {
return WikipediaFilter.TokenNotFound;
}
String closeTagString = new String(fSource, currentHtmlPosition, fCurrentPosition - currentHtmlPosition - 1).toLowerCase();
// System.out.println(closeTagString);
StringTokenizer tagTokenizer = new StringTokenizer(closeTagString);
String tokenString;
try {
tokenString = tagTokenizer.nextToken();
CloseTagToken token = (CloseTagToken) WikipediaFilter.CLOSE_TAGS.get(tokenString);
if (token == null) {
return WikipediaFilter.TokenNotFound;
}
Object topToken = fTokenStack.peek();
if (topToken instanceof OpenTagToken && ((OpenTagToken) topToken).getTagName() == token.getTagName()) {
fTokenStack.pop();
// if (token.getTagName().equals("table")) {
// fTableStack.pop();
// }
copyWhite(fWhiteStart, fWhiteStartPosition, 3 + tokenString.length());
fWhiteStart = false;
fResultBuffer.append(token.getCloseTag());
return WikipediaFilter.TokenIgnore;
}
fWhiteStart = false;
unexpectedTag(token.getTagName());
return WikipediaFilter.TokenIgnore;
} catch (NoSuchElementException e) {
return WikipediaFilter.TokenNotFound;
}
} else {
// opening tag
String tokenString;
int tagNameStart = fCurrentPosition;
int tokenLength = 0;
while (Character.isJavaIdentifierStart(fSource[fCurrentPosition++])) {
tokenLength++;
}
try {
tokenString = new String(fSource, tagNameStart, fCurrentPosition - tagNameStart - 1); //tagTokenizer.nextToken();
OpenTagToken token = (OpenTagToken) WikipediaFilter.OPEN_TAGS.get(tokenString);
if (token == null) {
return WikipediaFilter.TokenNotFound;
}
copyWhite(fWhiteStart, fWhiteStartPosition, (fCurrentPosition - tagNameStart) + 1);
fWhiteStart = false;
if (token instanceof SpecialTagToken) {
fResultBuffer.append(token.getOpenTag());
while (Character.isWhitespace(fSource[fCurrentPosition])) {
fCurrentPosition++;
}
if (fSource[fCurrentPosition] == '/') {
fCurrentPosition++;
}
if (fSource[fCurrentPosition] == '>') {
fCurrentPosition++;
}
} else if (token instanceof OpenTagToken) {
fResultBuffer.append("<");
fResultBuffer.append(token.getTagName());
fTokenStack.push(token);
fCurrentPosition = token.scan(fResultBuffer, fSource, fCurrentPosition - 1);
fResultBuffer.append(">");
}
// System.out.println(fResultBuffer);
return WikipediaFilter.TokenIgnore;
} catch (NoSuchElementException e) {
return WikipediaFilter.TokenNotFound;
}
}
} catch (IndexOutOfBoundsException e) {
//
}
fCurrentPosition = currentHtmlPosition;
return WikipediaFilter.TokenNotFound;
}
public final boolean getNextChar(char testedChar) {
int temp = fCurrentPosition;
try {
fCurrentCharacter = fSource[fCurrentPosition++];
if (fCurrentCharacter != testedChar) {
fCurrentPosition = temp;
return false;
}
return true;
} catch (IndexOutOfBoundsException e) {
fCurrentPosition = temp;
return false;
}
}
public final int getNextChar(char testedChar1, char testedChar2) {
int temp = fCurrentPosition;
try {
int result;
fCurrentCharacter = fSource[fCurrentPosition++];
if (fCurrentCharacter == testedChar1)
result = 0;
else if (fCurrentCharacter == testedChar2)
result = 1;
else {
fCurrentPosition = temp;
return -1;
}
return result;
} catch (IndexOutOfBoundsException e) {
fCurrentPosition = temp;
return -1;
}
}
public final boolean getNextCharAsDigit() {
int temp = fCurrentPosition;
try {
fCurrentCharacter = fSource[fCurrentPosition++];
if (!Character.isDigit(fCurrentCharacter)) {
fCurrentPosition = temp;
return false;
}
return true;
} catch (IndexOutOfBoundsException e) {
fCurrentPosition = temp;
return false;
}
}
public final boolean getNextCharAsDigit(int radix) {
int temp = fCurrentPosition;
try {
fCurrentCharacter = fSource[fCurrentPosition++];
if (Character.digit(fCurrentCharacter, radix) == -1) {
fCurrentPosition = temp;
return false;
}
return true;
} catch (IndexOutOfBoundsException e) {
fCurrentPosition = temp;
return false;
}
}
public final int getNumberOfChar(char testedChar) {
int number = 0;
try {
while ((fCurrentCharacter = fSource[fCurrentPosition++]) == testedChar) {
number++;
}
} catch (IndexOutOfBoundsException e) {
}
fCurrentPosition--;
return number;
}
public final char[] getListChars() {
int startPosition = fCurrentPosition - 1;
try {
while (true) {
fCurrentCharacter = fSource[fCurrentPosition++];
if (fCurrentCharacter != '*' && fCurrentCharacter != '#') {
break;
}
}
} catch (IndexOutOfBoundsException e) {
//
}
fCurrentPosition--;
char[] result = new char[fCurrentPosition - startPosition];
System.arraycopy(fSource, startPosition, result, 0, fCurrentPosition - startPosition);
return result;
}
public boolean getNextCharAsWikiPluginIdentifierPart() {
int temp = fCurrentPosition;
try {
fCurrentCharacter = fSource[fCurrentPosition++];
if (!WikipediaFilter.isWikiPluginIdentifierPart(fCurrentCharacter)) {
fCurrentPosition = temp;
return false;
}
return true;
} catch (IndexOutOfBoundsException e) {
fCurrentPosition = temp;
return false;
}
}
private void stopList() {
while (!fTokenStack.isEmpty()) {
AbstractTag tok = (AbstractTag) fTokenStack.peek();
if (tok.equals(WikipediaFilter.LIST_UL_START)) {
fTokenStack.pop();
fResultBuffer.append("");
} else if (tok.equals(WikipediaFilter.LIST_OL_START)) {
fTokenStack.pop();
fResultBuffer.append("");
} else if (tok == WikipediaFilter.BOLD) {
fTokenStack.pop();
fResultBuffer.append("");
} else if (tok == WikipediaFilter.ITALIC) {
fTokenStack.pop();
fResultBuffer.append("");
} else if (tok == WikipediaFilter.STRONG) {
fTokenStack.pop();
fResultBuffer.append("");
} else if (tok == WikipediaFilter.EM) {
fTokenStack.pop();
fResultBuffer.append("");
} else if (tok == WikipediaFilter.STRIKETHROUGH) {
fTokenStack.pop();
fResultBuffer.append("");
} else {
break;
}
}
}
protected int getNextToken() throws InvalidInputException {
boolean startOfIndent = false;
fWhiteStartPosition = 0;
fWhiteStart = false;
try {
while (true) {
// fStartPosition = fCurrentPosition;
fCurrentCharacter = fSource[fCurrentPosition++];
// ---------Identify the next token-------------
switch (fCurrentCharacter) {
case '\n':
if (fWhiteStart) {
int tempPosition = checkWhitespaces(fWhiteStartPosition, fCurrentPosition - 2);
if (tempPosition >= 0) {
copyWhite(fWhiteStart, fWhiteStartPosition, fCurrentPosition - (++tempPosition));
fWhiteStart = false;
stopList();
fResultBuffer.append("
");
// continue;
}
}
int fStartPrePosition = fCurrentPosition;
boolean preSection = false;
try {
while (fSource[fCurrentPosition++] == ' ') {
fCurrentCharacter = fSource[fCurrentPosition++];
while (fCurrentCharacter != '\n') {
if (!Character.isWhitespace(fCurrentCharacter)) {
// preformatted section starts here
preSection = true;
}
fCurrentCharacter = fSource[fCurrentPosition++];
}
}
--fCurrentPosition;
} catch (IndexOutOfBoundsException e) {
}
if (preSection && fRecursionLevel == 1) {
String preString;
copyWhite(fWhiteStart, fStartPrePosition, fCurrentPosition - fStartPrePosition);
fWhiteStart = true;
fResultBuffer.append("");
// copyWhite(fWhiteStart, fStartPrePosition, 1);
preString = new String(fSource, fStartPrePosition, fCurrentPosition - fStartPrePosition - 1) + '\n';
fResultBuffer.append(WikipediaFilter.filterParser(preString, fContext, fMacros, fRecursionLevel));
// preString = new String(fSource, fStartPrePosition, fCurrentPosition - fStartPrePosition - 1)+'\n';
// int preIndex = 0;
// int lastIndex = 0;
// while (preIndex>=0) {
// preIndex = preString.indexOf('\n', lastIndex);
// if (preIndex>=0) {
// fResultBuffer.append(WikipediaFilter.filterParser(preString.substring(lastIndex,preIndex), fContext,
// fCachedPage, fMacros, fRecursionLevel));
// fResultBuffer.append('\n');
// lastIndex = ++preIndex;
// }
// }
fResultBuffer.append("
");
fWhiteStart = false;
continue;
} else {
fCurrentPosition = fStartPrePosition;
}
break;
case ':':
if (isStartOfLine()) {
copyWhite(fWhiteStart, fWhiteStartPosition, 1);
fWhiteStart = false;
int levelHeader = getNumberOfChar(':') + 1;
int startHeadPosition = fCurrentPosition;
if (readUntilEOL()) {
String head = new String(fSource, startHeadPosition, fCurrentPosition - startHeadPosition);
for (int i = 0; i < levelHeader; i++) {
fResultBuffer.append("- ");
}
fResultBuffer.append(head);
for (int i = 0; i < levelHeader; i++) {
fResultBuffer.append("
");
}
continue;
}
continue;
}
break;
case ';':
if (isStartOfLine() && getNextChar(' ')) {
copyWhite(fWhiteStart, fWhiteStartPosition, 1);
fWhiteStart = false;
int startHeadPosition = fCurrentPosition;
if (readUntilEOL()) {
// TODO not correct - improve this
String head = new String(fSource, startHeadPosition, fCurrentPosition - startHeadPosition);
int index = head.indexOf(": ");
if (index > 0) {
fResultBuffer.append("- ");
fResultBuffer.append(head.substring(0,index));
fResultBuffer.append("
- ");
fResultBuffer.append(head.substring(index+2));
fResultBuffer.append("
");
} else {
fResultBuffer.append("- ");
fResultBuffer.append(head);
fResultBuffer.append("
");
}
continue;
}
continue;
}
break;
// case '\\': // special characters follow
// copyWhite(fWhiteStart, fWhiteStartPosition, 1);
// fWhiteStart = false;
// try {
// fCurrentCharacter = fSource[fCurrentPosition++];
// switch (fCurrentCharacter) {
// case '\\': // newline
// if ((fCurrentCharacter = fSource[fCurrentPosition++]) == '\\') {
// fResultBuffer.append(Encoder
// .toEntity(fCurrentCharacter));
// break;
// } else {
// fResultBuffer.append("
");
// break;
// }
// default:
// fResultBuffer.append(Encoder
// .toEntity(fCurrentCharacter));
// }
// } catch (IndexOutOfBoundsException e) {
//
// }
// continue;
// case '$' : // detect tex math
// copyWhite(fWhiteStart, fWhiteStartPosition, 1);
// fWhiteStart = false;
// startOfIndent = false;
// int startMathPosition = fCurrentPosition;
// if (getNextChar('$')) {
// startMathPosition = fCurrentPosition;
// copyWhite(fWhiteStart, fWhiteStartPosition, 2);
// fWhiteStart = false;
// if (readUntilString("$$")) {
// String mathContent = new String(fSource, startMathPosition,
// fCurrentPosition - startMathPosition - 2);
// if (mathContent != null) {
// handleTeXMath(mathContent, false);
// continue;
// }
// }
// } else {
// if (readUntilChar('$')) {
// String mathContent = new String(fSource, startMathPosition,
// fCurrentPosition - startMathPosition - 1);
// if (mathContent != null) {
// handleTeXMath(mathContent, true);
// continue;
// }
// }
// }
// break;
case '{':
// detect macros
copyWhite(fWhiteStart, fWhiteStartPosition, 1);
fWhiteStart = false;
// boolean scanBody = true;
int startMacroPosition = fCurrentPosition;
if (getNextChar('|') && handleWikipediaTable()) { // Wikipedia
// table
// syntax
continue;
} else {
if (readUntilChar('}')) {
String macroStartTag;
macroStartTag = new String(fSource, startMacroPosition, fCurrentPosition - startMacroPosition - 1);
if (macroStartTag != null) {
createMacro(startMacroPosition, macroStartTag);
continue;
}
}
}
break;
case '[':
int startLinkPosition = fCurrentPosition;
if (getNextChar('[')) { // wikipedia link style
startLinkPosition = fCurrentPosition;
copyWhite(fWhiteStart, fWhiteStartPosition, 2);
fWhiteStart = false;
if (readUntilString("]]")) {
String name = new String(fSource, startLinkPosition, fCurrentPosition - startLinkPosition - 2);
// test for suffix string
int temp = fCurrentPosition;
StringBuffer suffixBuffer = new StringBuffer();
try {
while (true) {
fCurrentCharacter = fSource[fCurrentPosition++];
if (!Character.isLetterOrDigit(fCurrentCharacter)) {
fCurrentPosition--;
break;
}
suffixBuffer.append(fCurrentCharacter);
}
handleWikipediaLink(name, suffixBuffer.toString());
continue;
} catch (IndexOutOfBoundsException e) {
fCurrentPosition = temp;
}
handleWikipediaLink(name, "");
continue;
}
} else {
copyWhite(fWhiteStart, fWhiteStartPosition, 1);
fWhiteStart = false;
if (readUntilChar(']')) {
String name = new String(fSource, startLinkPosition, fCurrentPosition - startLinkPosition - 1);
handleSnipLink(name);
continue;
}
}
break;
// case '1': // heading filter ?
// int temp1Position = checkWhitespaces(fWhiteStartPosition,
// fCurrentPosition - 2);
// if (temp1Position >= 0) {
// copyWhite(fWhiteStart, fWhiteStartPosition, 1);
// fWhiteStart = false;
// int simpleHeader = getNextChar(' ', '.');
// if (simpleHeader < 0) {
// if (getNextChar('1')) {
// fCurrentPosition--;
// if (getList('1', "", "
")) {
// continue;
// }
// }
// break;
// }
// if (simpleHeader == 1 && !getNextChar('1')) {
// fCurrentPosition--;
// if (getList('1', "", "
")) {
// continue;
// }
// break;
// }
// temp1Position = fCurrentPosition;
// if (simpleHeader >= 0 && readUntilChar('\n')) {
// String heading = new String(fSource, temp1Position,
// fCurrentPosition - temp1Position - 1);
// if (heading != null) {
// fResultBuffer.append("");
// // System.out.println(heading);
// fResultBuffer
// .append(WikipediaFilter
// .filterParser(
// heading,
// fContext,
// WikipediaFilter.DUMMY_CACHED_PAGE,
// fMacros,
// fRecursionLevel));
// fResultBuffer.append("
");
// continue;
// }
// }
// }
// break;
case '*': // list
case '#': // list
if (isStartOfLine()) {
char[] listChars = getListChars();
int tempStarPosition = checkWhitespaces(fWhiteStartPosition, fCurrentPosition - 1 - listChars.length);
if (tempStarPosition >= 0) {
appendList(listChars);
continue;
}
}
break;
// case '#': // list
// if (fCurrentPosition >= 2) {
// char beforeChar = fSource[fCurrentPosition - 2];
// if (beforeChar == '\n' || beforeChar == '\r') {
//
// int levelHash = getNumberOfChar('#') + 1;
//
// int tempHashPosition = checkWhitespaces(fWhiteStartPosition, fCurrentPosition - 1 - levelHash);
// if (tempHashPosition >= 0) {
// copyWhite(fWhiteStart, fWhiteStartPosition, levelHash);
// fWhiteStart = false;
// AbstractTag tok = (AbstractTag) fTokenStack.peek();
// if (tok instanceof ListToken) {
// ListToken listToken = (ListToken) tok;
// int topLevel = listToken.getLevel();
// if (listToken.getToken() == WikipediaFilter.TokenLIST_OL_START) {
// if (levelHash > topLevel) {
// fTokenStack.push(new ListToken(WikipediaFilter.TokenLIST_OL_START, topLevel + 1));
// fResultBuffer.append("- ");
// } else if (levelHash < topLevel) {
// fTokenStack.pop();
// fResultBuffer.append("
- ");
// } else {
// fResultBuffer.append("
- ");
// }
// } else {
// fTokenStack.push(new ListToken(WikipediaFilter.TokenLIST_OL_START, levelHash));
// fResultBuffer.append("
- ");
// }
// } else {
// fTokenStack.push(new ListToken(WikipediaFilter.TokenLIST_OL_START, 1));
// fResultBuffer.append("\n
- ");
// }
// continue;
// }
// }
// // }
// }
// break;
// case 'i': //
list
// if (getList('i', "", "
")) {
// continue;
// }
// break;
// case 'I': // list
// if (getList('i', "", "
")) {
// continue;
// }
// break;
// case 'a' : // list
// if (getList('a', "", "
")) {
// continue;
// }
// break;
// case 'A' : // list
// if (getList('A', "", "
")) {
// continue;
// }
// break;
// case 'g' : // list
// if (getList('g', "", "
")) {
// continue;
// }
// break;
// case 'H' : // list
// if (getList('H', "", "
")) {
// continue;
// }
// break;
// case 'k' : // list
// if (getList('k', "", "
")) {
// continue;
// }
// break;
// case 'K' : // list
// if (getList('K', "", "
")) {
// continue;
// }
// break;
// case 'j' : // list
// if (getList('j', "", "
")) {
// continue;
// }
// break;
case '\'':
if (getNextChar('\'')) {
if (getNextChar('\'')) {
copyWhite(fWhiteStart, fWhiteStartPosition, 3);
fWhiteStart = false;
return WikipediaFilter.TokenSTRONG;
}
copyWhite(fWhiteStart, fWhiteStartPosition, 2);
fWhiteStart = false;
return WikipediaFilter.TokenEM;
}
break;
// case '_':
// if (getNextChar('_')) {
// copyWhite(fWhiteStart, fWhiteStartPosition, 2);
// fWhiteStart = false;
// return WikipediaFilter.TokenBOLD;
// }
// break;
// case '~':
// if (getNextChar('~')) {
// copyWhite(fWhiteStart, fWhiteStartPosition, 2);
// fWhiteStart = false;
// return WikipediaFilter.TokenITALIC;
// }
// break;
case '-':
int tempCurrPosition = fCurrentPosition;
try {
if (fSource[tempCurrPosition++] == '-' && fSource[tempCurrPosition++] == '-' && fSource[tempCurrPosition++] == '-') {
if (fSource[tempCurrPosition] == '\n') {
fCurrentPosition = tempCurrPosition;
fResultBuffer.append("
");
fWhiteStart = false;
continue;
} else if (fSource[tempCurrPosition++] == '\r' && fSource[tempCurrPosition++] == '\n') {
fCurrentPosition = tempCurrPosition - 1;
fResultBuffer.append("
");
fWhiteStart = false;
continue;
}
}
} catch (IndexOutOfBoundsException e) {
}
// int levelMinus = getNumberOfChar('-') + 1;
// if (getNextChar(' ')) {
// int tempPosition = checkWhitespaces(
// fWhiteStartPosition, fCurrentPosition - 2
// - levelMinus);
// if (tempPosition >= 0) {
// copyWhite(fWhiteStart, fWhiteStartPosition,
// 1 + levelMinus);
// fWhiteStart = false;
// AbstractTag tok = (AbstractTag) fTokenStack.peek();
// if (tok instanceof ListToken) {
// ListToken listToken = (ListToken) tok;
// int topLevel = listToken.getLevel();
// if (listToken.getToken() ==
// WikipediaFilter.TokenLIST_UL_START) {
// if (levelMinus > topLevel) {
// fTokenStack
// .push(new ListToken(
// WikipediaFilter.TokenLIST_UL_START,
// topLevel + 1));
// fResultBuffer
// .append("- ");
// } else if (levelMinus < topLevel) {
// fTokenStack.pop();
// fResultBuffer
// .append("
- ");
// } else {
// fResultBuffer.append("
- ");
// }
// } else {
// fTokenStack
// .push(new ListToken(
// WikipediaFilter.TokenLIST_UL_START,
// levelMinus));
// fResultBuffer
// .append("
- ");
// }
// } else {
// fTokenStack
// .push(new ListToken(
// WikipediaFilter.TokenLIST_UL_START,
// 1));
// fResultBuffer
// .append("\n
- ");
// }
// continue;
// }
// }
// if (levelMinus == 2) {
// copyWhite(fWhiteStart, fWhiteStartPosition, 2);
// fWhiteStart = false;
// return WikipediaFilter.TokenSTRIKETHROUGH;
// }
break;
case 'h': // http(s)://
int urlStartPosition = fCurrentPosition;
boolean foundUrl = false;
int diff = 7;
try {
String urlString = fStringSource.substring(fCurrentPosition - 1, fCurrentPosition + 3);
if (urlString.equals("http")) {
fCurrentPosition += 3;
fCurrentCharacter = fSource[fCurrentPosition++];
if (fCurrentCharacter == 's') { // optional
fCurrentCharacter = fSource[fCurrentPosition++];
diff++;
}
if (fCurrentCharacter == ':' && fSource[fCurrentPosition++] == '/' && fSource[fCurrentPosition++] == '/') {
copyWhite(fWhiteStart, fWhiteStartPosition, diff);
fWhiteStart = false;
foundUrl = true;
while (WikipediaFilter.isUrlIdentifierPart(fSource[fCurrentPosition++])) {
}
}
}
} catch (IndexOutOfBoundsException e) {
if (!foundUrl) {
// rollback work :-)
fCurrentPosition = urlStartPosition;
}
}
if (foundUrl) {
String urlString = new String(fSource, urlStartPosition - 1, fCurrentPosition - urlStartPosition);
fCurrentPosition--;
WikipediaFilter.createExternalLink(fResultBuffer, fWikiEngine, urlString);
continue;
}
break;
// case '@': // images @xml@ -> /static/rss-small.png
// copyWhite(fWhiteStart, fWhiteStartPosition, 1);
// fWhiteStart = false;
// int atStart = fCurrentPosition;
// if (readUntilChar('@')) {
// String imageTag = new String(fSource, atStart, fCurrentPosition - atStart - 1);
// if (imageTag != null) {
// if (WikipediaFilter.createStaticImage(imageTag, fResultBuffer)) {
// continue;
// }
// }
// }
// fCurrentPosition = atStart;
// break;
case '&':
int ampersandStart = fCurrentPosition - 1;
if (getNextChar('#')) {
try {
StringBuffer num = new StringBuffer(5);
char ch = fSource[fCurrentPosition++];
while (Character.isDigit(ch)) {
num.append(ch);
ch = fSource[fCurrentPosition++];
}
if (num.length() > 0 && ch == ';') {
Integer i = Integer.valueOf(num.toString());
if (i.intValue() < 65536) {
copyWhite(fWhiteStart, fWhiteStartPosition, 3 + num.length());
fWhiteStart = false;
fResultBuffer.append(fSource, ampersandStart, fCurrentPosition - ampersandStart);
continue;
}
}
} catch (IndexOutOfBoundsException e) {
// ignore exception
} catch (NumberFormatException e) {
// ignore exception
}
} else {
try {
StringBuffer entity = new StringBuffer(10);
char ch = fSource[fCurrentPosition++];
while (Character.isLetterOrDigit(ch)) {
entity.append(ch);
ch = fSource[fCurrentPosition++];
}
if (entity.length() > 0 && ch == ';') {
if (WikipediaFilter.ENTITY_SET.contains(entity.toString())) {
copyWhite(fWhiteStart, fWhiteStartPosition, 2 + entity.length());
fWhiteStart = false;
fResultBuffer.append(fSource, ampersandStart, fCurrentPosition - ampersandStart);
continue;
}
}
} catch (IndexOutOfBoundsException e) {
// ignore exception
} catch (NumberFormatException e) {
// ignore exception
}
}
break;
case '<':
int htmlStartPosition = fCurrentPosition;
try {
switch (fStringSource.charAt(fCurrentPosition)) {
case '!': //
String htmlCommentString = fStringSource.substring(fCurrentPosition - 1, fCurrentPosition + 3);
if (htmlCommentString.equals("")) {
String htmlCommentContent = new String(fSource, htmlStartPosition + 3, fCurrentPosition - htmlStartPosition - 6);
if (htmlCommentContent != null) {
copyWhite(fWhiteStart, fWhiteStartPosition, fCurrentPosition - htmlStartPosition + 1);
fWhiteStart = false;
// insert html comment for visual checks
// only:
/*
* fResultBuffer.append(" ");
*/
continue;
}
}
}
break;
// case 'm' : // math
// String mathString =
// fStringSource.substring(fCurrentPosition - 1,
// fCurrentPosition + 5);
// if (mathString.equals("")) {
// String mathContent = new String(fSource,
// htmlStartPosition + 5, fCurrentPosition -
// htmlStartPosition - 12);
// if (mathContent != null) {
// copyWhite(fWhiteStart, fWhiteStartPosition,
// fCurrentPosition - htmlStartPosition + 1);
// fWhiteStart = false;
// if (startOfIndent) {
// startOfIndent = false;
// handleTeXMath(mathContent, false);
// } else {
// handleTeXMath(mathContent, true);
// }
// continue;
// }
// }
// }
// break;
case 'n': // nowiki
String nowikiString = fStringSource.substring(fCurrentPosition - 1, fCurrentPosition + 7);
if (nowikiString.equals("")) {
fCurrentPosition += 7;
if (readUntilString("")) {
String nowikiContent = new String(fSource, htmlStartPosition + 7, fCurrentPosition - htmlStartPosition - 16);
if (nowikiContent != null) {
copyWhite(fWhiteStart, fWhiteStartPosition, fCurrentPosition - htmlStartPosition + 1);
fWhiteStart = false;
copyNowikiNewLine(nowikiContent);
continue;
}
}
}
break;
}
} catch (IndexOutOfBoundsException e) {
}
startOfIndent = false;
fCurrentPosition = htmlStartPosition;
// detect special html tags
int htmlToken = getHTMLToken();
if (htmlToken == WikipediaFilter.TokenIgnore) {
continue;
// } else if (htmlToken > TokenIgnore) {
// return htmlToken;
}
fCurrentPosition = htmlStartPosition;
break;
case '=': // wikipedia header ?
if (isStartOfLine()) {
int levelHeader = getNumberOfChar('=') + 1;
// int tempPosition = checkWhitespaces(fWhiteStartPosition, fCurrentPosition - 1 - levelHeader);
// if (tempPosition >= 0) {
copyWhite(fWhiteStart, fWhiteStartPosition, levelHeader);
fWhiteStart = false;
int startHeadPosition = fCurrentPosition;
// int initialOffset = levelHeader;
if (levelHeader > 6) {
levelHeader = 6;
}
levelHeader--;
if (readUntilString(WikipediaFilter.HEADER_STRINGS[levelHeader])) {
String head = new String(fSource, startHeadPosition, fCurrentPosition - startHeadPosition - (1 + levelHeader));
levelHeader++;
handleHead(head, levelHeader);
continue;
}
// }
}
break;
}
if (!fWhiteStart) {
fWhiteStart = true;
fWhiteStartPosition = fCurrentPosition - 1;
}
startOfIndent = false;
}
// -----------------end switch while try--------------------
} catch (IndexOutOfBoundsException e) {
// end of scanner text
}
copyWhite(fWhiteStart, fWhiteStartPosition, 1);
return WikipediaFilter.TokenEOF;
}
/**
* @return
*/
private boolean isStartOfLine() {
boolean isListStart = false;
if (fCurrentPosition >= 2) {
char beforeChar = fSource[fCurrentPosition - 2];
if (beforeChar == '\n' || beforeChar == '\r') {
isListStart = true;
}
}
if (fCurrentPosition == 1) {
isListStart = true;
}
return isListStart;
}
/**
* @param levelStar
* @param listChars
* TODO
*/
private void appendList(char[] listChars) {
int topLevel = 0;
int levelStar = listChars.length;
copyWhite(fWhiteStart, fWhiteStartPosition, levelStar);
fWhiteStart = false;
AbstractTag tok = (AbstractTag) fTokenStack.peek();
if (tok instanceof ListToken) {
ListToken listToken = (ListToken) tok;
topLevel = listToken.getLevel();
if (levelStar > topLevel) {
while (levelStar > topLevel) {
if (listChars[topLevel] == '*') {
fTokenStack.push(new ListToken(WikipediaFilter.TokenLIST_UL_START, ++topLevel));
fResultBuffer.append("
- ");
} else {
fTokenStack.push(new ListToken(WikipediaFilter.TokenLIST_OL_START, ++topLevel));
fResultBuffer.append("
- ");
}
}
} else if (levelStar < topLevel) {
while (levelStar < topLevel) {
tok = (AbstractTag) fTokenStack.peek();
if (tok instanceof ListToken) {
fTokenStack.pop();
listToken = (ListToken) tok;
if (listToken.getToken() == WikipediaFilter.TokenLIST_UL_START) {
fResultBuffer.append("
- ");
} else {
fResultBuffer.append("
- ");
}
topLevel--;
} else {
break;
}
}
} else {
--topLevel;
if (listToken.getToken() == WikipediaFilter.TokenLIST_UL_START && listChars[topLevel] == '#') {
fTokenStack.pop();
fTokenStack.push(new ListToken(WikipediaFilter.TokenLIST_OL_START, topLevel));
fResultBuffer.append("
- ");
} else if (listToken.getToken() == WikipediaFilter.TokenLIST_OL_START && listChars[topLevel] == '*') {
fTokenStack.pop();
fTokenStack.push(new ListToken(WikipediaFilter.TokenLIST_UL_START, topLevel));
fResultBuffer.append("