X-Git-Url: http://secure.phpeclipse.com diff --git a/net.sourceforge.phpeclipse.ui/src/net/sourceforge/phpdt/internal/ui/text/spelling/engine/AbstractSpellDictionary.java b/net.sourceforge.phpeclipse.ui/src/net/sourceforge/phpdt/internal/ui/text/spelling/engine/AbstractSpellDictionary.java new file mode 100644 index 0000000..8e0bab5 --- /dev/null +++ b/net.sourceforge.phpeclipse.ui/src/net/sourceforge/phpdt/internal/ui/text/spelling/engine/AbstractSpellDictionary.java @@ -0,0 +1,445 @@ +/******************************************************************************* + * Copyright (c) 2000, 2003 IBM Corporation and others. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Common Public License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/legal/cpl-v10.html + * + * Contributors: + * IBM Corporation - initial API and implementation + *******************************************************************************/ + +package net.sourceforge.phpdt.internal.ui.text.spelling.engine; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.net.MalformedURLException; +import java.net.URL; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +/** + * Partial implementation of a spell dictionary. + * + * @since 3.0 + */ +public abstract class AbstractSpellDictionary implements ISpellDictionary { + + /** The bucket capacity */ + protected static final int BUCKET_CAPACITY = 4; + + /** The word buffer capacity */ + protected static final int BUFFER_CAPACITY = 32; + + /** The distance threshold */ + protected static final int DISTANCE_THRESHOLD = 160; + + /** The hash capacity */ + protected static final int HASH_CAPACITY = 22 * 1024; + + /** The phonetic distance algorithm */ + private IPhoneticDistanceAlgorithm fDistanceAlgorithm = new DefaultPhoneticDistanceAlgorithm(); + + /** The mapping from phonetic hashes to word lists */ + private final Map fHashBuckets = new HashMap(HASH_CAPACITY); + + /** The phonetic hash provider */ + private IPhoneticHashProvider fHashProvider = new DefaultPhoneticHashProvider(); + + /** Is the dictionary already loaded? */ + private boolean fLoaded = false; + + /** + * Returns all candidates with the same phonetic hash. + * + * @param hash + * The hash to retrieve the candidates of + * @return Array of candidates for the phonetic hash + */ + protected final ArrayList getCandidates(final String hash) { + + ArrayList list = (ArrayList) fHashBuckets.get(hash); + if (list == null) + list = new ArrayList(0); + + return list; + } + + /** + * Returns all candidates that have a phonetic hash within a bounded + * distance to the specified word. + * + * @param word + * The word to find the nearest matches for + * @param sentence + * true iff the proposals start a new sentence, + * false otherwise + * @param hashs + * Array of close hashes to find the matches + * @return Set of ranked words with bounded distance to the specified word + */ + protected final HashSet getCandidates(final String word, + final boolean sentence, final ArrayList hashs) { + + int distance = 0; + String hash = null; + + String candidate = null; + List candidates = null; + + final StringBuffer buffer = new StringBuffer(BUFFER_CAPACITY); + final HashSet result = new HashSet(BUCKET_CAPACITY * hashs.size()); + + for (int index = 0; index < hashs.size(); index++) { + + hash = (String) hashs.get(index); + candidates = getCandidates(hash); + + for (int offset = 0; offset < candidates.size(); offset++) { + + candidate = (String) candidates.get(offset); + distance = fDistanceAlgorithm.getDistance(word, candidate); + + if (distance < DISTANCE_THRESHOLD) { + + buffer.setLength(0); + buffer.append(candidate); + + if (sentence) + buffer.setCharAt(0, Character.toUpperCase(buffer + .charAt(0))); + + result.add(new RankedWordProposal(buffer.toString(), + -distance)); + } + } + } + return result; + } + + /** + * Returns all approximations that have a phonetic hash with smallest + * possible distance to the specified word. + * + * @param word + * The word to find the nearest matches for + * @param sentence + * true iff the proposals start a new sentence, + * false otherwise + * @param result + * Set of ranked words with smallest possible distance to the + * specified word + */ + protected final void getCandidates(final String word, + final boolean sentence, final HashSet result) { + + int distance = 0; + int minimum = Integer.MAX_VALUE; + + String candidate = null; + StringBuffer buffer = new StringBuffer(BUFFER_CAPACITY); + + final ArrayList candidates = getCandidates(fHashProvider.getHash(word)); + final ArrayList matches = new ArrayList(candidates.size()); + + for (int index = 0; index < candidates.size(); index++) { + + candidate = (String) candidates.get(index); + distance = fDistanceAlgorithm.getDistance(word, candidate); + + if (distance <= minimum) { + + buffer.setLength(0); + buffer.append(candidate); + + if (sentence) + buffer + .setCharAt(0, Character.toUpperCase(buffer + .charAt(0))); + + matches + .add(new RankedWordProposal(buffer.toString(), + -distance)); + minimum = distance; + } + } + + RankedWordProposal match = null; + + for (int index = 0; index < matches.size(); index++) { + + match = (RankedWordProposal) matches.get(index); + if (match.getRank() == minimum) + result.add(match); + } + } + + /** + * Returns the used phonetic distance algorithm. + * + * @return The phonetic distance algorithm + */ + protected final IPhoneticDistanceAlgorithm getDistanceAlgorithm() { + return fDistanceAlgorithm; + } + + /** + * Returns the used phonetic hash provider. + * + * @return The phonetic hash provider + */ + protected final IPhoneticHashProvider getHashProvider() { + return fHashProvider; + } + + /* + * @see net.sourceforge.phpdt.internal.ui.text.spelling.engine.ISpellDictionary#getProposals(java.lang.String,boolean) + */ + public Set getProposals(final String word, final boolean sentence) { + + try { + + if (!fLoaded) + load(getURL()); + + } catch (MalformedURLException exception) { + // Do nothing + } + + final String hash = fHashProvider.getHash(word); + final char[] mutators = fHashProvider.getMutators(); + + final ArrayList neighborhood = new ArrayList((word.length() + 1) + * (mutators.length + 2)); + neighborhood.add(hash); + + final HashSet candidates = getCandidates(word, sentence, neighborhood); + neighborhood.clear(); + + char previous = 0; + char next = 0; + + char[] characters = word.toCharArray(); + for (int index = 0; index < word.length() - 1; index++) { + + next = characters[index]; + previous = characters[index + 1]; + + characters[index] = previous; + characters[index + 1] = next; + + neighborhood.add(fHashProvider.getHash(new String(characters))); + + characters[index] = next; + characters[index + 1] = previous; + } + + final String sentinel = word + " "; //$NON-NLS-1$ + + characters = sentinel.toCharArray(); + int offset = characters.length - 1; + + while (true) { + + for (int index = 0; index < mutators.length; index++) { + + characters[offset] = mutators[index]; + neighborhood.add(fHashProvider.getHash(new String(characters))); + } + + if (offset == 0) + break; + + characters[offset] = characters[offset - 1]; + --offset; + } + + char mutated = 0; + characters = word.toCharArray(); + + for (int index = 0; index < word.length(); index++) { + + mutated = characters[index]; + for (int mutator = 0; mutator < mutators.length; mutator++) { + + characters[index] = mutators[mutator]; + neighborhood.add(fHashProvider.getHash(new String(characters))); + } + characters[index] = mutated; + } + + characters = word.toCharArray(); + final char[] deleted = new char[characters.length - 1]; + + for (int index = 0; index < deleted.length; index++) + deleted[index] = characters[index]; + + next = characters[characters.length - 1]; + offset = deleted.length; + + while (true) { + + neighborhood.add(fHashProvider.getHash(new String(characters))); + if (offset == 0) + break; + + previous = next; + next = deleted[offset - 1]; + + deleted[offset - 1] = previous; + --offset; + } + + neighborhood.remove(hash); + final HashSet matches = getCandidates(word, sentence, neighborhood); + + if (matches.size() == 0 && candidates.size() == 0) + getCandidates(word, sentence, candidates); + + candidates.addAll(matches); + + return candidates; + } + + /** + * Returns the URL of the dictionary word list. + * + * @throws MalformedURLException + * if the URL could not be retrieved + * @return The URL of the dictionary word list + */ + protected abstract URL getURL() throws MalformedURLException; + + /** + * Hashes the word into the dictionary. + * + * @param word + * The word to hash in the dictionary + */ + protected final void hashWord(final String word) { + + final String hash = fHashProvider.getHash(word); + ArrayList bucket = (ArrayList) fHashBuckets.get(hash); + + if (bucket == null) { + + bucket = new ArrayList(BUCKET_CAPACITY); + fHashBuckets.put(hash, bucket); + } + + bucket.add(word); + } + + /* + * @see net.sourceforge.phpdt.internal.ui.text.spelling.engine.ISpellDictionary#isCorrect(java.lang.String) + */ + public boolean isCorrect(final String word) { + + try { + + if (!fLoaded) + load(getURL()); + + } catch (MalformedURLException exception) { + // Do nothing + } + + final ArrayList candidates = getCandidates(fHashProvider.getHash(word)); + + if (candidates.contains(word) + || candidates.contains(word.toLowerCase())) + return true; + + return false; + } + + /* + * @see net.sourceforge.phpdt.ui.text.spelling.engine.ISpellDictionary#isLoaded() + */ + public final synchronized boolean isLoaded() { + return fLoaded || fHashBuckets.size() > 0; + } + + /** + * Loads a dictionary word list from disk. + * + * @param url + * The URL of the word list to load + * @return true iff the word list could be loaded, + * false otherwise + */ + protected synchronized boolean load(final URL url) { + + if (url != null) { + + try { + + final InputStream stream = url.openStream(); + if (stream != null) { + + String word = null; + + final BufferedReader reader = new BufferedReader( + new InputStreamReader(stream)); + while ((word = reader.readLine()) != null) + hashWord(word); + + return fLoaded = true; + } + } catch (IOException exception) { + // Do nothing + } + } + return false; + } + + /** + * Sets the phonetic distance algorithm to use. + * + * @param algorithm + * The phonetic distance algorithm + */ + protected final void setDistanceAlgorithm( + final IPhoneticDistanceAlgorithm algorithm) { + fDistanceAlgorithm = algorithm; + } + + /** + * Sets the phonetic hash provider to use. + * + * @param provider + * The phonetic hash provider + */ + protected final void setHashProvider(final IPhoneticHashProvider provider) { + fHashProvider = provider; + } + + /* + * @see net.sourceforge.phpdt.ui.text.spelling.engine.ISpellDictionary#unload() + */ + public synchronized void unload() { + + fLoaded = false; + fHashBuckets.clear(); + } + + /* + * @see net.sourceforge.phpdt.ui.text.spelling.engine.ISpellDictionary#acceptsWords() + */ + public boolean acceptsWords() { + return false; + } + + /* + * @see net.sourceforge.phpdt.internal.ui.text.spelling.engine.ISpellDictionary#addWord(java.lang.String) + */ + public void addWord(final String word) { + // Do nothing + } +}