package net.sourceforge.phpeclipse.wiki.actions.mediawiki.connect; //Parts of this sources are copied and modified from the jEdit Wikipedia plugin: //http://www.djini.de/software/wikipedia/index.html // //The modified sources are available under the "Common Public License" //with permission from the original author: Daniel Wunsch import java.io.IOException; import java.io.StringReader; import java.io.UnsupportedEncodingException; import java.net.URLDecoder; import java.util.ArrayList; import java.util.regex.Matcher; import java.util.regex.Pattern; import net.sourceforge.phpeclipse.wiki.actions.mediawiki.config.IWikipedia; import net.sourceforge.phpeclipse.wiki.actions.mediawiki.exceptions.MethodException; import net.sourceforge.phpeclipse.wiki.actions.mediawiki.exceptions.PageNotEditableException; import net.sourceforge.phpeclipse.wiki.actions.mediawiki.exceptions.UnexpectedAnswerException; import net.sourceforge.phpeclipse.wiki.editor.WikiEditorPlugin; import org.apache.commons.httpclient.ConnectMethod; import org.apache.commons.httpclient.HostConfiguration; import org.apache.commons.httpclient.HttpClient; import org.apache.commons.httpclient.HttpConnection; import org.apache.commons.httpclient.HttpException; import org.apache.commons.httpclient.HttpMethod; import org.apache.commons.httpclient.HttpState; import org.apache.commons.httpclient.HttpStatus; import org.apache.commons.httpclient.MultiThreadedHttpConnectionManager; import org.apache.commons.httpclient.NameValuePair; import org.apache.commons.httpclient.URI; import org.apache.commons.httpclient.UsernamePasswordCredentials; import org.apache.commons.httpclient.methods.GetMethod; import org.apache.commons.httpclient.methods.PostMethod; import org.apache.commons.httpclient.protocol.Protocol; import org.apache.commons.httpclient.util.EncodingUtil; import org.eclipse.core.runtime.CoreException; import org.eclipse.core.runtime.Preferences; /** * This class gets the wikitext from a wikipedia edit page * * The basic coding was copied from the commons-httpclient example MediaWikiConnector.java */ public class MediaWikiConnector { //pattern used to scarp an edit page private static final Pattern BODY_PATTERN = Pattern.compile( /* * action=".*?title=(.*?)(&|\")
*/ ".*]*\\sid=\"editform\"[^>]*title=(.*?)&[^>]*>" + ".*]*\\sname=\"wpTextbox1\"[^>]*>(.*?)" + ".*]*\\svalue=\"(\\d*)\"[^>]*\\sname=\"wpEdittime\"[^>]*>" + ".*", Pattern.DOTALL); // private static final Pattern EDIT_TOKEN = Pattern.compile( ".*.*", Pattern.DOTALL); //setup default user agent final static public String userAgent = "plog4u.org/0.0"; // create a ConnectionManager private MultiThreadedHttpConnectionManager manager; private HttpClient client; /** * Delay a new store to 1 second */ private Throttle loadThrottle = new Throttle(WikiEditorPlugin.HTTP_GET_THROTTLE); /** * Delay a new store to 1 second */ private Throttle storeThrottle = new Throttle(WikiEditorPlugin.HTTP_PUT_THROTTLE); class Throttle { private long nextTime = 0; private final long minimumDelay; public Throttle(long minimumDelay) { this.minimumDelay = minimumDelay; } public Throttle(String key) { Preferences prefs = WikiEditorPlugin.getDefault().getPluginPreferences(); String minimumDelay = prefs.getString(key); this.minimumDelay = Integer.parseInt(minimumDelay); } /** this is called from the client */ public synchronized void delay() throws InterruptedException { long delay = nextTime - System.currentTimeMillis(); if (delay > 0) Thread.sleep(delay); nextTime = System.currentTimeMillis() + minimumDelay; } } public MediaWikiConnector() { // bookmarklet manager = new MultiThreadedHttpConnectionManager(); manager.setMaxConnectionsPerHost(6); manager.setMaxTotalConnections(18); manager.setConnectionStaleCheckingEnabled(true); // open the conversation client = new HttpClient(manager); setHTTPClientParameters(client); //client.State.CookiePolicy = CookiePolicy.COMPATIBILITY; //client.HostConfiguration.setHost(LOGON_SITE, LOGON_PORT, "http"); } /** destructor freeing all resources. the Connection is not usable any more after calling this method */ public void destroy() { manager.shutdown(); } /** log in - returns success */ public boolean login(IWikipedia config, String actionUrl, String user, String password, boolean remember) throws UnexpectedAnswerException, MethodException { PostMethod method = new PostMethod(actionUrl); method.setFollowRedirects(false); method.addRequestHeader("User-Agent", userAgent); NameValuePair[] params = new NameValuePair[] { new NameValuePair("title", config.getLoginTitle()), new NameValuePair("action", "submit"), new NameValuePair("wpName", user), new NameValuePair("wpPassword", password), new NameValuePair("wpRemember", remember ? "1" : "0"), new NameValuePair("wpLoginattempt", "submit") }; method.addParameters(params); boolean result; try { int responseCode = client.executeMethod(method); String responseBody = method.getResponseBodyAsString(); //### debugging //log(responseBody); // log(method); if (responseCode == 302 && responseBody.length() == 0 || responseCode == 200 && responseBody.matches(config.getLoginSuccess())) { result = true; } else if (responseCode == 200 && responseBody.matches(config.getLoginWrongPw()) || responseCode == 200 && responseBody.matches(config.getLoginNoUser())) { result = false; if (responseBody.matches(config.getLoginNoUser())) { throw new UnexpectedAnswerException("login not successful: wrong user name: " + user); } else if (responseBody.matches(config.getLoginWrongPw())) { throw new UnexpectedAnswerException("login not successful: wrong password for user: " + user); } else { throw new UnexpectedAnswerException("logout not successful: responseCode == 200"); } } else { throw new UnexpectedAnswerException("login not successful: " + method.getStatusLine()); } } catch (HttpException e) { throw new MethodException("method failed", e); } catch (IOException e) { throw new MethodException("method failed", e); } finally { method.releaseConnection(); } /* * // display cookies System.err.println("login: " + result); for (var cookie : client.State.Cookies) { * System.err.println("cookie: " + cookie); } */ // remember state SiteState state = SiteState.siteState(config); state.loggedIn = result; state.userName = user; return result; } /** log out - return success */ public boolean logout(IWikipedia config, String actionUrl) throws UnexpectedAnswerException, MethodException { GetMethod method = new GetMethod(actionUrl); method.setFollowRedirects(false); method.addRequestHeader("User-Agent", userAgent); NameValuePair[] params = new NameValuePair[] { new NameValuePair("title", config.getLogoutTitle()), new NameValuePair("action", "submit") }; method.setQueryString(EncodingUtil.formUrlEncode(params, config.getCharSet())); boolean result; try { int responseCode = client.executeMethod(method); String responseBody = method.getResponseBodyAsString(); // log(method); if (responseCode == 302 && responseBody.length() == 0 || responseCode == 200 && responseBody.matches(config.getLogoutSuccess())) { // config.getloggedIn = false; result = true; } else if (responseCode == 200) { //### should check for a failure message result = false; throw new UnexpectedAnswerException("logout not successful: responseCode == 200"); } else { throw new UnexpectedAnswerException("logout not successful: " + method.getStatusLine()); } } catch (HttpException e) { throw new MethodException("method failed", e); } catch (IOException e) { throw new MethodException("method failed", e); } finally { method.releaseConnection(); } // remember state SiteState state = SiteState.siteState(config); state.loggedIn = false; return result; } /** parses a returned editform for the sessions wpEditToken */ private String parseEditToken(String charSet, String responseBody) throws PageNotEditableException { Matcher matcher = EDIT_TOKEN.matcher(responseBody); if (!matcher.matches()) { return null; } return matcher.group(1); } /** * returns the edit token or null if no token is available * * @param actionURL * @param charSet * @param title * @return * @throws UnexpectedAnswerException * @throws MethodException * @throws PageNotEditableException */ public String loadEditToken(String actionURL, String charSet, String title) throws UnexpectedAnswerException, MethodException, PageNotEditableException { GetMethod method = new GetMethod(actionURL); method.setFollowRedirects(false); method.addRequestHeader("User-Agent", userAgent); NameValuePair[] params = new NameValuePair[] { new NameValuePair("title", title), new NameValuePair("action", "edit") }; method.setQueryString(EncodingUtil.formUrlEncode(params, charSet)); try { int responseCode = client.executeMethod(method); String responseBody = method.getResponseBodyAsString(); // log(method); if (responseCode == 200) { String parsed = parseEditToken(charSet, responseBody); if (parsed != null && parsed.length() == 0) { return null; } return parsed; } else { throw new UnexpectedAnswerException("load not successful: expected 200 OK, got " + method.getStatusLine()); } } catch (HttpException e) { throw new MethodException("method failed", e); } catch (IOException e) { throw new MethodException("method failed", e); } finally { method.releaseConnection(); } } /** parses a returned editform into a Content object with UNIX-EOLs ("\n") */ private Parsed parseBody(String charSet, String responseBody) throws PageNotEditableException, UnsupportedEncodingException { Matcher matcher = BODY_PATTERN.matcher(responseBody); if (!matcher.matches()) throw new PageNotEditableException("cannot find editform form"); String title = matcher.group(1); String body = matcher.group(2); String timestamp = matcher.group(3); String tokenEdit = null; // String tokenEdit = matcher.group(4); title = URLDecoder.decode(title, charSet); body = body.replaceAll(""", "\"").replaceAll("'", "'").replaceAll("<", "<").replaceAll(">", ">").replaceAll( "&", "&").replaceAll("\r\n", "\n").replace('\r', '\n'); return new Parsed(timestamp, title, body, tokenEdit); } /** load a Page Version - returns a Loaded Object */ public Loaded load(String actionURL, String charSet, String title) throws UnexpectedAnswerException, MethodException, PageNotEditableException { GetMethod method = new GetMethod(actionURL); method.setFollowRedirects(false); method.addRequestHeader("User-Agent", userAgent); NameValuePair[] params = new NameValuePair[] { new NameValuePair("title", title), new NameValuePair("action", "edit") }; method.setQueryString(EncodingUtil.formUrlEncode(params, charSet)); Loaded result; try { int responseCode = client.executeMethod(method); String responseBody = method.getResponseBodyAsString(); // log(method); if (responseCode == 200) { Parsed parsed = parseBody(charSet, responseBody); Content content = new Content(parsed.timestamp, parsed.body); result = new Loaded(actionURL, charSet, parsed.title, content); } else { throw new UnexpectedAnswerException("load not successful: expected 200 OK, got " + method.getStatusLine()); } } catch (HttpException e) { throw new MethodException("method failed", e); } catch (IOException e) { throw new MethodException("method failed", e); } finally { method.releaseConnection(); } return result; } public String loadHTMLPage(String actionURL, String charSet) throws UnexpectedAnswerException, MethodException, PageNotEditableException { GetMethod method = new GetMethod(actionURL); method.setFollowRedirects(true); method.addRequestHeader("User-Agent", userAgent); // NameValuePair[] params = new NameValuePair[] { new NameValuePair("title", title) }; // method.setQueryString(EncodingUtil.formUrlEncode(params, charSet)); String result = ""; try { int responseCode = client.executeMethod(method); String responseBody = method.getResponseBodyAsString(); // log(method); if (responseCode == 200) { result = responseBody; // Parsed parsed = parseBody(charSet, responseBody); // Content content = new Content(parsed.timestamp, parsed.body); // result = new Loaded(actionURL, charSet, parsed.title, content); } else { throw new UnexpectedAnswerException("load not successful: expected 200 OK, got " + method.getStatusLine()); } } catch (HttpException e) { throw new MethodException("method failed", e); } catch (IOException e) { throw new MethodException("method failed", e); } finally { method.releaseConnection(); } return result; } public ArrayList loadXML(IWikipedia config, String actionURL, String pages) throws UnexpectedAnswerException, MethodException, InterruptedException { loadThrottle.delay(); PostMethod method = new PostMethod(actionURL); method.setFollowRedirects(false); method.addRequestHeader("User-Agent", userAgent); method.addRequestHeader("Content-Type", PostMethod.FORM_URL_ENCODED_CONTENT_TYPE + "; charset=" + config.getCharSet()); NameValuePair[] params = new NameValuePair[] { new NameValuePair("pages", pages), new NameValuePair("curonly", "X"), new NameValuePair("action", "submit") }; method.addParameters(params); try { int responseCode = client.executeMethod(method); String responseBody = method.getResponseBodyAsString(); if (responseCode == 200) { StringReader reader = new StringReader(responseBody); return XMLReader.readFromStream(reader); } else { throw new UnexpectedAnswerException("XML load not successful: expected 200 OK, got " + method.getStatusLine()); } } catch (CoreException e) { throw new UnexpectedAnswerException("XML load method failed" + e.getMessage()); } catch (HttpException e) { throw new MethodException("XML load method failed", e); } catch (IOException e) { throw new MethodException("XML load method failed", e); } finally { method.releaseConnection(); } } /** * store a Page Version - returns a Stored object * * @param config - * WiKipedia predefined properties * @param actionURL * @param title * @param content * @param summary * @param minorEdit * @param watchThis * @return * @throws UnexpectedAnswerException * @throws MethodException * @throws PageNotEditableException * @throws InterruptedException */ public Stored store(IWikipedia config, String editToken, String actionUrl, String title, Content content, String summary, boolean minorEdit, boolean watchThis) throws UnexpectedAnswerException, MethodException, PageNotEditableException, InterruptedException { //### workaround: prevent too many stores at a time storeThrottle.delay(); PostMethod method = new PostMethod(actionUrl); method.setFollowRedirects(false); method.addRequestHeader("User-Agent", userAgent); method.addRequestHeader("Content-Type", PostMethod.FORM_URL_ENCODED_CONTENT_TYPE + "; charset=" + config.getCharSet()); if (editToken == null) { // in some versions editToken isn't supported editToken = " "; } NameValuePair[] params = new NameValuePair[] { // new NameValuePair("wpSection", ""), // new NameValuePair("wpPreview", "Vorschau zeigen"), // new NameValuePair("wpSave", "Artikel speichern"), new NameValuePair("title", title), new NameValuePair("wpTextbox1", content.body), new NameValuePair("wpEdittime", content.timestamp), new NameValuePair("wpSummary", summary), new NameValuePair("wpEditToken", editToken), new NameValuePair("wpSave", "yes"), new NameValuePair("action", "submit") }; method.addParameters(params); if (minorEdit) method.addParameter("wpMinoredit", "1"); if (watchThis) method.addParameter("wpWatchthis", "1"); Stored result; try { int responseCode = client.executeMethod(method); String responseBody = method.getResponseBodyAsString(); // log(method); // since 11dec04 there is a single linefeed instead of an empty page.. trim() helps. if (responseCode == 302 && responseBody.trim().length() == 0) { // log("store successful, reloading"); Loaded loaded = load(actionUrl, config.getCharSet(), title); result = new Stored(actionUrl, config.getCharSet(), loaded.title, loaded.content, false); } else if (responseCode == 200) { // log("store not successful, conflict detected"); Parsed parsed = parseBody(config.getCharSet(), responseBody); Content cont = new Content(parsed.timestamp, parsed.body); result = new Stored(actionUrl, config.getCharSet(), parsed.title, cont, true); } else { throw new UnexpectedAnswerException("store not successful: expected 200 OK, got " + method.getStatusLine()); } } catch (HttpException e) { throw new MethodException("method failed", e); } catch (IOException e) { throw new MethodException("method failed", e); } finally { method.releaseConnection(); } return result; } /** * Get the text of a wikimedia article * */ public String getWikiRawText(String wikiname, String urlStr) { // examples // http://en.wikipedia.org/w/wiki.phtml?title=Main_Page&action=raw // http://en.wikibooks.org/w/index.php?title=Programming:PHP:SQL_Injection&action=raw // http://en.wikipedia.org/w/wiki.phtml?title=Talk:Division_by_zero&action=raw HttpMethod method = null; try { if (urlStr == null) { WikiEditorPlugin.getDefault().reportError("No Wikipedia URL configured", "URL-String == null"); // urlStr = "http://en.wikipedia.org/w/wiki.phtml?title=" + wikiname + "&action=raw"; } URI uri = new URI(urlStr.toCharArray()); String schema = uri.getScheme(); if ((schema == null) || (schema.equals(""))) { schema = "http"; } Protocol protocol = Protocol.getProtocol(schema); method = new GetMethod(uri.toString()); String host = uri.getHost(); int port = uri.getPort(); HttpConnection connection = new HttpConnection(host, port, protocol); HttpState state = setHTTPParameters(connection); if (connection.isProxied() && connection.isSecure()) { method = new ConnectMethod(method); } method.execute(state, connection); // client.executeMethod(method); if (method.getStatusCode() == HttpStatus.SC_OK) { // get the wiki text now: String wikiText = method.getResponseBodyAsString(); return wikiText; } } catch (Throwable e) { WikiEditorPlugin.log(e); WikiEditorPlugin.getDefault().reportError("Exception occured", e.getMessage() + "\nSee stacktrace in /.metadata/.log file."); } finally { if (method != null) { method.releaseConnection(); } } return null; // no success in getting wiki text } // public static String getWikiEditTextarea(String wikiname, String urlStr) { // // examples // // http://en.wikipedia.org/w/wiki.phtml?title=Main_Page&action=edit // // http://en.wikibooks.org/w/wiki.phtml?title=Programming:PHP:SQL_Injection&action=edit // // http://en.wikipedia.org/w/wiki.phtml?title=Talk:Division_by_zero&action=edit // HttpMethod method = null; // try { // if (urlStr == null) { // urlStr = "http://en.wikipedia.org/w/wiki.phtml?title=" + wikiname + "&action=edit"; // } // // else { // // urlStr = urlStr + "?title=" + wikiname + "&action=edit"; // // } // URI uri = new URI(urlStr.toCharArray()); // // String schema = uri.getScheme(); // if ((schema == null) || (schema.equals(""))) { // schema = "http"; // } // Protocol protocol = Protocol.getProtocol(schema); // // HttpState state = new HttpState(); // // method = new GetMethod(uri.toString()); // String host = uri.getHost(); // int port = uri.getPort(); // // HttpConnection connection = new HttpConnection(host, port, protocol); // // connection.setProxyHost(System.getProperty("http.proxyHost")); // connection.setProxyPort(Integer.parseInt(System.getProperty("http.proxyPort", "80"))); // // if (System.getProperty("http.proxyUserName") != null) { // state.setProxyCredentials(null, null, new UsernamePasswordCredentials(System.getProperty("http.proxyUserName"), System // .getProperty("http.proxyPassword"))); // } // // if (connection.isProxied() && connection.isSecure()) { // method = new ConnectMethod(method); // } // // method.execute(state, connection); // // if (method.getStatusCode() == HttpStatus.SC_OK) { // // get the textareas wiki text now: // InputStream stream = method.getResponseBodyAsStream(); // int byteLen = stream.available(); // int count = 1; // byte[] buffer = new byte[byteLen]; // stream.read(buffer, 0, byteLen); // String wikiText = new String(buffer); // // String wikiText = method.getResponseBodyAsString(); // int start = wikiText.indexOf("", start + 1); // if (start != (-1)) { // int end = wikiText.indexOf(""); // wikiText = wikiText.substring(start + 1, end); // } // } // return wikiText; // // System.out.println(wikiText); // // } // } catch (Exception e) { // e.printStackTrace(); // } finally { // if (method != null) { // method.releaseConnection(); // } // } // return null; // no success in getting wiki text // } /** * @param state * @param connection */ private HttpState setHTTPParameters(HttpConnection connection) { HttpState state = new HttpState(); Preferences prefs = WikiEditorPlugin.getDefault().getPluginPreferences(); String timeout = prefs.getString(WikiEditorPlugin.HTTP_TIMEOUT); String proxyHost = prefs.getString(WikiEditorPlugin.HTTP_PROXYHOST); try { // timeout after xx seconds connection.setConnectionTimeout(Integer.parseInt(timeout)); if (proxyHost.length() > 0) { String proxyPort = prefs.getString(WikiEditorPlugin.HTTP_PROXYPORT); connection.setProxyHost(proxyHost); connection.setProxyPort(Integer.parseInt(proxyPort)); String proxyUserName = prefs.getString(WikiEditorPlugin.HTTP_PROXYUSERNAME); if (proxyUserName.length() > 0) { String proxyPassWord = prefs.getString(WikiEditorPlugin.HTTP_PROXYPASSWORD); state.setProxyCredentials(null, null, new UsernamePasswordCredentials(proxyUserName, proxyPassWord)); } } } catch (Exception e) { } return state; } private void setHTTPClientParameters(HttpClient client) { Preferences prefs = WikiEditorPlugin.getDefault().getPluginPreferences(); String timeout = prefs.getString(WikiEditorPlugin.HTTP_TIMEOUT); String proxyHost = prefs.getString(WikiEditorPlugin.HTTP_PROXYHOST); try { // timeout after xx seconds client.setConnectionTimeout(Integer.parseInt(timeout)); if (proxyHost.length() > 0) { String proxyPort = prefs.getString(WikiEditorPlugin.HTTP_PROXYPORT); HostConfiguration conf = new HostConfiguration(); client.setHostConfiguration(conf); conf.setProxy(proxyHost, Integer.parseInt(proxyPort)); String proxyUserName = prefs.getString(WikiEditorPlugin.HTTP_PROXYUSERNAME); if (proxyUserName.length() > 0) { HttpState state = new HttpState(); String proxyPassWord = prefs.getString(WikiEditorPlugin.HTTP_PROXYPASSWORD); state.setProxyCredentials(null, null, new UsernamePasswordCredentials(proxyUserName, proxyPassWord)); client.setState(state); } } } catch (Exception e) { } } public static void main(String[] args) { MediaWikiConnector mwc = new MediaWikiConnector(); try { IWikipedia wp = null; ArrayList list = mwc.loadXML(wp, "http://www.plog4u.de/wiki/index.php/Spezial:Export", "Mechanisches Fernsehen\nSynästhesie"); for (int i = 0; i < list.size(); i++) { System.out.println(list.get(i).toString()); } } catch (UnexpectedAnswerException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } } }