package org.plog4u.wiki.filter;
-import java.io.IOException;
-import java.io.Writer;
import java.util.ArrayList;
import java.util.NoSuchElementException;
import java.util.Stack;
import java.util.StringTokenizer;
-//import org.apache.commons.logging.Log;
-//import org.apache.commons.logging.LogFactory;
import org.plog4u.wiki.filter.WikipediaFilter.InvalidInputException;
import org.plog4u.wiki.filter.tags.AbstractTag;
import org.plog4u.wiki.filter.tags.CloseTagToken;
import org.radeox.api.engine.RenderEngine;
import org.radeox.api.engine.WikiRenderEngine;
import org.radeox.filter.context.FilterContext;
-import org.radeox.filter.interwiki.InterWiki;
import org.radeox.macro.Macro;
import org.radeox.macro.MacroRepository;
import org.radeox.macro.parameter.MacroParameter;
-import org.radeox.util.Encoder;
import org.radeox.util.StringBufferWriter;
/**
// private String fSrcPath;
// private String fBinPath;
-
public WikipediaParser(MacroRepository macros, String stringSource, StringBuffer result, FilterContext context, int recursionLevel) {
fContext = context;
fWikiEngine = context.getRenderContext().getRenderEngine();
} else {
lastIndex++;
}
- fResultBuffer.append("<");
+ fResultBuffer.append("<");
break;
case '>': // special html escape character
if (lastIndex < (currentIndex - 1)) {
} else {
lastIndex++;
}
- fResultBuffer.append(">");
+ fResultBuffer.append(">");
break;
case '&': // special html escape character
if (lastIndex < (currentIndex - 1)) {
} else {
lastIndex++;
}
- fResultBuffer.append("&");
+ fResultBuffer.append("&");
break;
case '\'': // special html escape character
if (lastIndex < (currentIndex - 1)) {
} else {
lastIndex++;
}
- fResultBuffer.append(""");
+ fResultBuffer.append(""");
break;
}
}
fResultBuffer.append(text.substring(lastIndex, currentIndex - 1));
lastIndex = currentIndex;
}
- fResultBuffer.append("<");
+ fResultBuffer.append("<");
break;
case '>': // special html escape character
if (lastIndex < (currentIndex - 1)) {
} else {
lastIndex++;
}
- fResultBuffer.append(">");
+ fResultBuffer.append(">");
break;
case '&': // special html escape character
if (lastIndex < (currentIndex - 1)) {
} else {
lastIndex++;
}
- fResultBuffer.append("&");
+ fResultBuffer.append("&");
break;
case '\'': // special html escape character
if (lastIndex < (currentIndex - 1)) {
} else {
lastIndex++;
}
- fResultBuffer.append(""");
+ fResultBuffer.append(""");
break;
}
}
char closeCharacter;
char nextCharacter;
if (getNextChar('/')) {
+ // end tag detected
currentHtmlPosition++;
// closing tag
int r = readUntilCharOrEOL('>');
}
} else {
- // opening tag
+ // start tag
String tokenString;
int tagNameStart = fCurrentPosition;
int tokenLength = 0;
- while (Character.isJavaIdentifierStart(fSource[fCurrentPosition++])) {
+ while (Character.isJavaIdentifierStart(fSource[fCurrentPosition])) {
+ fCurrentPosition++;
tokenLength++;
}
try {
- tokenString = new String(fSource, tagNameStart, fCurrentPosition - tagNameStart - 1); //tagTokenizer.nextToken();
+ tokenString = new String(fSource, tagNameStart, fCurrentPosition - tagNameStart); //tagTokenizer.nextToken();
+
OpenTagToken token = (OpenTagToken) WikipediaFilter.OPEN_TAGS.get(tokenString);
if (token == null) {
return WikipediaFilter.TokenNotFound;
}
copyWhite(fWhiteStart, fWhiteStartPosition, (fCurrentPosition - tagNameStart) + 1);
fWhiteStart = false;
-
if (token instanceof SpecialTagToken) {
- fResultBuffer.append(token.getOpenTag());
+ // for <br> <br/> <br /> <hr> <hr/>
+
while (Character.isWhitespace(fSource[fCurrentPosition])) {
fCurrentPosition++;
}
}
if (fSource[fCurrentPosition] == '>') {
fCurrentPosition++;
+ fWhiteStartPosition = fCurrentPosition;
+ // insert the special tag :
+ fResultBuffer.append(token.getOpenTag());
+ return WikipediaFilter.TokenIgnore;
}
+
} else if (token instanceof OpenTagToken) {
fResultBuffer.append("<");
fResultBuffer.append(token.getTagName());
fTokenStack.push(token);
- fCurrentPosition = token.scan(fResultBuffer, fSource, fCurrentPosition - 1);
+ fCurrentPosition = token.scan(fResultBuffer, fSource, fCurrentPosition);
fResultBuffer.append(">");
+ return WikipediaFilter.TokenIgnore;
}
-
- // System.out.println(fResultBuffer);
- return WikipediaFilter.TokenIgnore;
+ return WikipediaFilter.TokenNotFound;
} catch (NoSuchElementException e) {
return WikipediaFilter.TokenNotFound;
}
copyWhite(fWhiteStart, fWhiteStartPosition, 1);
fWhiteStart = false;
- int startHeadPosition = fCurrentPosition;
+ int startHeadPosition = fCurrentPosition - 1;
if (readUntilEOL()) {
// TODO not correct - improve this
String head = new String(fSource, startHeadPosition, fCurrentPosition - startHeadPosition);
- int index = head.indexOf(": ");
+ int index = head.indexOf(" : ");
if (index > 0) {
fResultBuffer.append("<dl><dt>");
- fResultBuffer.append(head.substring(0,index));
- fResultBuffer.append("</dt><dd>");
- fResultBuffer.append(head.substring(index+2));
+ fResultBuffer.append(head.substring(0, index));
+ fResultBuffer.append(" </dt><dd>");
+ fResultBuffer.append(head.substring(index + 2));
fResultBuffer.append("</dd></dl>");
} else {
fResultBuffer.append("<dl><dt>");
fResultBuffer.append(head);
- fResultBuffer.append("</dt></dl>");
+ fResultBuffer.append(" </dt></dl>");
}
continue;
}
continue;
}
break;
- // case '\\': // special characters follow
- // copyWhite(fWhiteStart, fWhiteStartPosition, 1);
- // fWhiteStart = false;
- // try {
- // fCurrentCharacter = fSource[fCurrentPosition++];
- // switch (fCurrentCharacter) {
- // case '\\': // newline
- // if ((fCurrentCharacter = fSource[fCurrentPosition++]) == '\\') {
- // fResultBuffer.append(Encoder
- // .toEntity(fCurrentCharacter));
- // break;
- // } else {
- // fResultBuffer.append("<br />");
- // break;
- // }
- // default:
- // fResultBuffer.append(Encoder
- // .toEntity(fCurrentCharacter));
- // }
- // } catch (IndexOutOfBoundsException e) {
- //
- // }
- // continue;
- // case '$' : // detect tex math
- // copyWhite(fWhiteStart, fWhiteStartPosition, 1);
- // fWhiteStart = false;
- // startOfIndent = false;
- // int startMathPosition = fCurrentPosition;
- // if (getNextChar('$')) {
- // startMathPosition = fCurrentPosition;
- // copyWhite(fWhiteStart, fWhiteStartPosition, 2);
- // fWhiteStart = false;
- // if (readUntilString("$$")) {
- // String mathContent = new String(fSource, startMathPosition,
- // fCurrentPosition - startMathPosition - 2);
- // if (mathContent != null) {
- // handleTeXMath(mathContent, false);
- // continue;
- // }
- // }
- // } else {
- // if (readUntilChar('$')) {
- // String mathContent = new String(fSource, startMathPosition,
- // fCurrentPosition - startMathPosition - 1);
- // if (mathContent != null) {
- // handleTeXMath(mathContent, true);
- // continue;
- // }
- // }
- // }
- // break;
- case '{':
- // detect macros
- copyWhite(fWhiteStart, fWhiteStartPosition, 1);
- fWhiteStart = false;
- // boolean scanBody = true;
- int startMacroPosition = fCurrentPosition;
- if (getNextChar('|') && handleWikipediaTable()) { // Wikipedia
- // table
- // syntax
- continue;
- } else {
- if (readUntilChar('}')) {
- String macroStartTag;
-
- macroStartTag = new String(fSource, startMacroPosition, fCurrentPosition - startMacroPosition - 1);
- if (macroStartTag != null) {
- createMacro(startMacroPosition, macroStartTag);
- continue;
- }
- }
- }
- break;
case '[':
int startLinkPosition = fCurrentPosition;
if (getNextChar('[')) { // wikipedia link style
}
}
break;
- // case '1': // heading filter ?
- // int temp1Position = checkWhitespaces(fWhiteStartPosition,
- // fCurrentPosition - 2);
- // if (temp1Position >= 0) {
- // copyWhite(fWhiteStart, fWhiteStartPosition, 1);
- // fWhiteStart = false;
- // int simpleHeader = getNextChar(' ', '.');
- // if (simpleHeader < 0) {
- // if (getNextChar('1')) {
- // fCurrentPosition--;
- // if (getList('1', "<ol>", "</ol>")) {
- // continue;
- // }
- // }
- // break;
- // }
- // if (simpleHeader == 1 && !getNextChar('1')) {
- // fCurrentPosition--;
- // if (getList('1', "<ol>", "</ol>")) {
- // continue;
- // }
- // break;
- // }
- // temp1Position = fCurrentPosition;
- // if (simpleHeader >= 0 && readUntilChar('\n')) {
- // String heading = new String(fSource, temp1Position,
- // fCurrentPosition - temp1Position - 1);
- // if (heading != null) {
- // fResultBuffer.append("<h3 class=\"heading-");
- // if (simpleHeader == 1) {
- // fResultBuffer.append("1");
- // } else {
- // fResultBuffer.append("1-1");
- // }
- // fResultBuffer.append("\">");
- // // System.out.println(heading);
- // fResultBuffer
- // .append(WikipediaFilter
- // .filterParser(
- // heading,
- // fContext,
- // WikipediaFilter.DUMMY_CACHED_PAGE,
- // fMacros,
- // fRecursionLevel));
- // fResultBuffer.append("</h3>");
- // continue;
- // }
- // }
- // }
- // break;
case '*': // <ul> list
case '#': // <ol> list
if (isStartOfLine()) {
}
}
break;
- // case '#': // <ol> list
- // if (fCurrentPosition >= 2) {
- // char beforeChar = fSource[fCurrentPosition - 2];
- // if (beforeChar == '\n' || beforeChar == '\r') {
- //
- // int levelHash = getNumberOfChar('#') + 1;
- //
- // int tempHashPosition = checkWhitespaces(fWhiteStartPosition, fCurrentPosition - 1 - levelHash);
- // if (tempHashPosition >= 0) {
- // copyWhite(fWhiteStart, fWhiteStartPosition, levelHash);
- // fWhiteStart = false;
- // AbstractTag tok = (AbstractTag) fTokenStack.peek();
- // if (tok instanceof ListToken) {
- // ListToken listToken = (ListToken) tok;
- // int topLevel = listToken.getLevel();
- // if (listToken.getToken() == WikipediaFilter.TokenLIST_OL_START) {
- // if (levelHash > topLevel) {
- // fTokenStack.push(new ListToken(WikipediaFilter.TokenLIST_OL_START, topLevel + 1));
- // fResultBuffer.append("<ol><li>");
- // } else if (levelHash < topLevel) {
- // fTokenStack.pop();
- // fResultBuffer.append("</li></ol></li><li>");
- // } else {
- // fResultBuffer.append("</li><li>");
- // }
- // } else {
- // fTokenStack.push(new ListToken(WikipediaFilter.TokenLIST_OL_START, levelHash));
- // fResultBuffer.append("<ol><li>");
- // }
- // } else {
- // fTokenStack.push(new ListToken(WikipediaFilter.TokenLIST_OL_START, 1));
- // fResultBuffer.append("\n<ol><li>");
- // }
- // continue;
- // }
- // }
- // // }
- // }
- // break;
-
- // case 'i': // <ol> list
- // if (getList('i', "<ol class=\"roman\">", "</ol>")) {
- // continue;
- // }
- // break;
- // case 'I': // <ol> list
- // if (getList('i', "<ol class=\"ROMAN\">", "</ol>")) {
- // continue;
- // }
- // break;
- // case 'a' : // <ol> list
- // if (getList('a', "<ol class=\"alpha\">", "</ol>")) {
- // continue;
- // }
- // break;
- // case 'A' : // <ol> list
- // if (getList('A', "<ol class=\"ALPHA\">", "</ol>")) {
- // continue;
- // }
- // break;
- // case 'g' : // <ol> list
- // if (getList('g', "<ol class=\"greek\">", "</ol>")) {
- // continue;
- // }
- // break;
- // case 'H' : // <ol> list
- // if (getList('H', "<ol class=\"HIRAGANA\">", "</ol>")) {
- // continue;
- // }
- // break;
- // case 'k' : // <ol> list
- // if (getList('k', "<ol class=\"katakana\">", "</ol>")) {
- // continue;
- // }
- // break;
- // case 'K' : // <ol> list
- // if (getList('K', "<ol class=\"KATAKANA\">", "</ol>")) {
- // continue;
- // }
- // break;
- // case 'j' : // <ol> list
- // if (getList('j', "<ol class=\"HEBREW\">", "</ol>")) {
- // continue;
- // }
- // break;
-
case '\'':
if (getNextChar('\'')) {
if (getNextChar('\'')) {
return WikipediaFilter.TokenEM;
}
break;
- // case '_':
- // if (getNextChar('_')) {
- // copyWhite(fWhiteStart, fWhiteStartPosition, 2);
- // fWhiteStart = false;
- // return WikipediaFilter.TokenBOLD;
- // }
- // break;
- // case '~':
- // if (getNextChar('~')) {
- // copyWhite(fWhiteStart, fWhiteStartPosition, 2);
- // fWhiteStart = false;
- // return WikipediaFilter.TokenITALIC;
- // }
- // break;
case '-':
int tempCurrPosition = fCurrentPosition;
try {
} catch (IndexOutOfBoundsException e) {
}
-
- // int levelMinus = getNumberOfChar('-') + 1;
- // if (getNextChar(' ')) {
- // int tempPosition = checkWhitespaces(
- // fWhiteStartPosition, fCurrentPosition - 2
- // - levelMinus);
- // if (tempPosition >= 0) {
- // copyWhite(fWhiteStart, fWhiteStartPosition,
- // 1 + levelMinus);
- // fWhiteStart = false;
- // AbstractTag tok = (AbstractTag) fTokenStack.peek();
- // if (tok instanceof ListToken) {
- // ListToken listToken = (ListToken) tok;
- // int topLevel = listToken.getLevel();
- // if (listToken.getToken() ==
- // WikipediaFilter.TokenLIST_UL_START) {
- // if (levelMinus > topLevel) {
- // fTokenStack
- // .push(new ListToken(
- // WikipediaFilter.TokenLIST_UL_START,
- // topLevel + 1));
- // fResultBuffer
- // .append("<ul class=\"minus\"><li>");
- // } else if (levelMinus < topLevel) {
- // fTokenStack.pop();
- // fResultBuffer
- // .append("</li></ul></li><li>");
- // } else {
- // fResultBuffer.append("</li><li>");
- // }
- // } else {
- // fTokenStack
- // .push(new ListToken(
- // WikipediaFilter.TokenLIST_UL_START,
- // levelMinus));
- // fResultBuffer
- // .append("<ul class=\"minus\"><li>");
- // }
- // } else {
- // fTokenStack
- // .push(new ListToken(
- // WikipediaFilter.TokenLIST_UL_START,
- // 1));
- // fResultBuffer
- // .append("\n<ul class=\"minus\"><li>");
- // }
- // continue;
- // }
- // }
- // if (levelMinus == 2) {
- // copyWhite(fWhiteStart, fWhiteStartPosition, 2);
- // fWhiteStart = false;
- // return WikipediaFilter.TokenSTRIKETHROUGH;
- // }
break;
case 'h': // http(s)://
int urlStartPosition = fCurrentPosition;
if (foundUrl) {
String urlString = new String(fSource, urlStartPosition - 1, fCurrentPosition - urlStartPosition);
fCurrentPosition--;
- WikipediaFilter.createExternalLink(fResultBuffer, fWikiEngine, urlString);
+ createExternalLink(urlString);
continue;
}
break;
}
}
break;
+ case '{':
+ // detect macros
+ copyWhite(fWhiteStart, fWhiteStartPosition, 1);
+ fWhiteStart = false;
+ int startMacroPosition = fCurrentPosition;
+ if (getNextChar('|') && handleWikipediaTable()) { // Wikipedia
+ // table
+ // syntax
+ continue;
+ // } else {
+ // SnipSnap / Radeox Macro Syntax
+ // if (readUntilChar('}')) {
+ // String macroStartTag;
+ //
+ // macroStartTag = new String(fSource, startMacroPosition, fCurrentPosition - startMacroPosition - 1);
+ // if (macroStartTag != null) {
+ // createMacro(startMacroPosition, macroStartTag);
+ // continue;
+ // }
+ // }
+ }
+ break;
case '<':
int htmlStartPosition = fCurrentPosition;
try {
}
}
break;
- // case 'm' : // math
- // String mathString =
- // fStringSource.substring(fCurrentPosition - 1,
- // fCurrentPosition + 5);
-
- // if (mathString.equals("<math>")) {
- // fCurrentPosition += 5;
- // if (readUntilString("</math>")) {
- // String mathContent = new String(fSource,
- // htmlStartPosition + 5, fCurrentPosition -
- // htmlStartPosition - 12);
- // if (mathContent != null) {
- // copyWhite(fWhiteStart, fWhiteStartPosition,
- // fCurrentPosition - htmlStartPosition + 1);
- // fWhiteStart = false;
- // if (startOfIndent) {
- // startOfIndent = false;
- // handleTeXMath(mathContent, false);
- // } else {
- // handleTeXMath(mathContent, true);
- // }
- // continue;
- // }
- // }
- // }
- // break;
case 'n': // nowiki
String nowikiString = fStringSource.substring(fCurrentPosition - 1, fCurrentPosition + 7);
break;
}
} catch (IndexOutOfBoundsException e) {
-
+ // do nothing
}
startOfIndent = false;
fCurrentPosition = htmlStartPosition;
}
fResultBuffer.append("<span class=\"nobr\">");
fResultBuffer.append("<a href=\"");
- fResultBuffer.append(Encoder.escape(urlString));
+// fResultBuffer.append(Encoder.escape(urlString));
+ copyWhite(urlString);
fResultBuffer.append("\">");
- fResultBuffer.append(Encoder.toEntity(alias.charAt(0)) + alias.substring(1));
+// fResultBuffer.append(Encoder.toEntity(alias.charAt(0)) + alias.substring(1));
+ copyWhite(alias);
fResultBuffer.append("</a></span>");
}
// else {
// fCurrentPosition = temp - 1;
// return false;
// }
+ public void createExternalLink(String urlString) {
+ // Does our engine know images?
+ if (fWikiEngine instanceof ImageRenderEngine) {
+ fResultBuffer.append(((ImageRenderEngine) fWikiEngine).getExternalImageLink());
+ }
+ fResultBuffer.append("<span class=\"nobr\">");
+ fResultBuffer.append("<a href=\"");
+// fResultBuffer.append(Encoder.escape(urlString));
+ copyWhite(urlString);
+ fResultBuffer.append("\">");
+// fResultBuffer.append(Encoder.escape(urlString));
+ copyWhite(urlString);
+ fResultBuffer.append("</a></span>");
+ }
+
private void handleWikipediaLink(String linkText, String suffix) {
String name = linkText;
if (name != null) {
// Configuration probably wrote [http://radeox.org] instead of
// http://radeox.org
if (index != -1) {
- WikipediaFilter.createExternalLink(fResultBuffer, fWikiEngine, name.substring(index));
+ createExternalLink(name.substring(index));
// show error
// fResult.append("<div class=\"error\">Do not surround URLs
// with [...].</div>");
// name = name.substring(colonIndex + 1);
// }
- int atIndex = name.lastIndexOf('@');
+// int atIndex = name.lastIndexOf('@');
// InterWiki link ?
- if (-1 != atIndex) {
- String extSpace = name.substring(atIndex + 1);
- // known extarnal space ?
- InterWiki interWiki = InterWiki.getInstance();
- if (interWiki.contains(extSpace)) {
- name = name.substring(0, atIndex);
- Writer writer = new StringBufferWriter(fResultBuffer);
- try {
- if (-1 != hashIndex) {
- interWiki.expand(writer, extSpace, name, hash);
- } else {
- interWiki.expand(writer, extSpace, name, "");
- }
- } catch (IOException e) {
- // log.debug("InterWiki " + extSpace + " not found.");
- }
- } else {
- fResultBuffer.append("[<span class=\"error\">");
- fResultBuffer.append(name);
- fResultBuffer.append("?</span>]");
- }
- } else {
+// if (-1 != atIndex) {
+// String extSpace = name.substring(atIndex + 1);
+// // known extarnal space ?
+// InterWiki interWiki = InterWiki.getInstance();
+// if (interWiki.contains(extSpace)) {
+// name = name.substring(0, atIndex);
+// Writer writer = new StringBufferWriter(fResultBuffer);
+// try {
+// if (-1 != hashIndex) {
+// interWiki.expand(writer, extSpace, name, hash);
+// } else {
+// interWiki.expand(writer, extSpace, name, "");
+// }
+// } catch (IOException e) {
+// // log.debug("InterWiki " + extSpace + " not found.");
+// }
+// } else {
+// fResultBuffer.append("[<span class=\"error\">");
+// fResultBuffer.append(name);
+// fResultBuffer.append("?</span>]");
+// }
+// } else {
// internal link
+ name = Encoder.escape(name);
if (name.startsWith("Image:") && (fWikiEngine instanceof ImageRenderEngine)) {
// server part of rendering images
ImageRenderEngine imageEngine = (ImageRenderEngine) fWikiEngine;
// cannot display/create wiki, so just display the text
fResultBuffer.append(name);
}
- }
+// }
}
}
}
if (fResultBufferHeader != null) {
int tocStart = fResultBufferHeader.length();
- fResultBufferHeader.append("<table id=\"toc\" border=\"0\"><tr><th>Table of contents</th></tr><tr><td>");
- fResultBufferHeader.append("<ol>");
- createToC(fTableOfContent);
- fResultBufferHeader.append("</ol>");
- fResultBufferHeader.append("</td></tr></table><hr/>");
+ if (isToC(fTableOfContent) > 3) {
+ fResultBufferHeader.append("<table id=\"toc\" border=\"0\"><tr><th>Table of contents</th></tr><tr><td>");
+ fResultBufferHeader.append("<ol>");
+ createToC(fTableOfContent);
+ fResultBufferHeader.append("</ol>");
+ fResultBufferHeader.append("</td></tr></table><hr/>");
+ }
fResultBufferHeader.append(fResultBuffer);
fResultBuffer = fResultBufferHeader;
}
}
+ /**
+ * count the number of wiki headers in this document
+ *
+ * @param toc
+ * @return
+ */
+ private int isToC(ArrayList toc) {
+
+ if (toc.size() == 1 && (toc.get(0) instanceof ArrayList)) {
+ return isToC((ArrayList) toc.get(0));
+ }
+ int result = 0;
+ for (int i = 0; i < toc.size(); i++) {
+ if (toc.get(i) instanceof ArrayList) {
+ result += isToC((ArrayList) toc.get(i));
+ } else {
+ result++;
+ }
+ }
+ return result;
+ }
+
private void createToC(ArrayList toc) {
if (toc.size() == 1 && (toc.get(0) instanceof ArrayList)) {
createToC((ArrayList) toc.get(0));