1 /*******************************************************************************
2 * Copyright (c) 2000, 2003 IBM Corporation and others.
3 * All rights reserved. This program and the accompanying materials
4 * are made available under the terms of the Common Public License v1.0
5 * which accompanies this distribution, and is available at
6 * http://www.eclipse.org/legal/cpl-v10.html
9 * IBM Corporation - initial API and implementation
10 *******************************************************************************/
12 package net.sourceforge.phpdt.internal.ui.text.spelling.engine;
14 import java.io.BufferedReader;
15 import java.io.IOException;
16 import java.io.InputStream;
17 import java.io.InputStreamReader;
18 import java.net.MalformedURLException;
20 import java.util.ArrayList;
21 import java.util.HashMap;
22 import java.util.HashSet;
23 import java.util.List;
28 * Partial implementation of a spell dictionary.
32 public abstract class AbstractSpellDictionary implements ISpellDictionary {
34 /** The bucket capacity */
35 protected static final int BUCKET_CAPACITY= 4;
37 /** The word buffer capacity */
38 protected static final int BUFFER_CAPACITY= 32;
40 /** The distance threshold */
41 protected static final int DISTANCE_THRESHOLD= 160;
43 /** The hash capacity */
44 protected static final int HASH_CAPACITY= 22 * 1024;
46 /** The phonetic distance algorithm */
47 private IPhoneticDistanceAlgorithm fDistanceAlgorithm= new DefaultPhoneticDistanceAlgorithm();
49 /** The mapping from phonetic hashes to word lists */
50 private final Map fHashBuckets= new HashMap(HASH_CAPACITY);
52 /** The phonetic hash provider */
53 private IPhoneticHashProvider fHashProvider= new DefaultPhoneticHashProvider();
55 /** Is the dictionary already loaded? */
56 private boolean fLoaded= false;
59 * Returns all candidates with the same phonetic hash.
62 * The hash to retrieve the candidates of
63 * @return Array of candidates for the phonetic hash
65 protected final ArrayList getCandidates(final String hash) {
67 ArrayList list= (ArrayList)fHashBuckets.get(hash);
69 list= new ArrayList(0);
75 * Returns all candidates that have a phonetic hash within a bounded
76 * distance to the specified word.
79 * The word to find the nearest matches for
81 * <code>true</code> iff the proposals start a new sentence,
82 * <code>false</code> otherwise
84 * Array of close hashes to find the matches
85 * @return Set of ranked words with bounded distance to the specified word
87 protected final HashSet getCandidates(final String word, final boolean sentence, final ArrayList hashs) {
92 String candidate= null;
93 List candidates= null;
95 final StringBuffer buffer= new StringBuffer(BUFFER_CAPACITY);
96 final HashSet result= new HashSet(BUCKET_CAPACITY * hashs.size());
98 for (int index= 0; index < hashs.size(); index++) {
100 hash= (String)hashs.get(index);
101 candidates= getCandidates(hash);
103 for (int offset= 0; offset < candidates.size(); offset++) {
105 candidate= (String)candidates.get(offset);
106 distance= fDistanceAlgorithm.getDistance(word, candidate);
108 if (distance < DISTANCE_THRESHOLD) {
111 buffer.append(candidate);
114 buffer.setCharAt(0, Character.toUpperCase(buffer.charAt(0)));
116 result.add(new RankedWordProposal(buffer.toString(), -distance));
124 * Returns all approximations that have a phonetic hash with smallest
125 * possible distance to the specified word.
128 * The word to find the nearest matches for
130 * <code>true</code> iff the proposals start a new sentence,
131 * <code>false</code> otherwise
133 * Set of ranked words with smallest possible distance to the
136 protected final void getCandidates(final String word, final boolean sentence, final HashSet result) {
139 int minimum= Integer.MAX_VALUE;
141 String candidate= null;
142 StringBuffer buffer= new StringBuffer(BUFFER_CAPACITY);
144 final ArrayList candidates= getCandidates(fHashProvider.getHash(word));
145 final ArrayList matches= new ArrayList(candidates.size());
147 for (int index= 0; index < candidates.size(); index++) {
149 candidate= (String)candidates.get(index);
150 distance= fDistanceAlgorithm.getDistance(word, candidate);
152 if (distance <= minimum) {
155 buffer.append(candidate);
158 buffer.setCharAt(0, Character.toUpperCase(buffer.charAt(0)));
160 matches.add(new RankedWordProposal(buffer.toString(), -distance));
165 RankedWordProposal match= null;
167 for (int index= 0; index < matches.size(); index++) {
169 match= (RankedWordProposal)matches.get(index);
170 if (match.getRank() == minimum)
176 * Returns the used phonetic distance algorithm.
178 * @return The phonetic distance algorithm
180 protected final IPhoneticDistanceAlgorithm getDistanceAlgorithm() {
181 return fDistanceAlgorithm;
185 * Returns the used phonetic hash provider.
187 * @return The phonetic hash provider
189 protected final IPhoneticHashProvider getHashProvider() {
190 return fHashProvider;
194 * @see org.eclipse.jdt.internal.ui.text.spelling.engine.ISpellDictionary#getProposals(java.lang.String,boolean)
196 public Set getProposals(final String word, final boolean sentence) {
203 } catch (MalformedURLException exception) {
207 final String hash= fHashProvider.getHash(word);
208 final char[] mutators= fHashProvider.getMutators();
210 final ArrayList neighborhood= new ArrayList((word.length() + 1) * (mutators.length + 2));
211 neighborhood.add(hash);
213 final HashSet candidates= getCandidates(word, sentence, neighborhood);
214 neighborhood.clear();
219 char[] characters= word.toCharArray();
220 for (int index= 0; index < word.length() - 1; index++) {
222 next= characters[index];
223 previous= characters[index + 1];
225 characters[index]= previous;
226 characters[index + 1]= next;
228 neighborhood.add(fHashProvider.getHash(new String(characters)));
230 characters[index]= next;
231 characters[index + 1]= previous;
234 final String sentinel= word + " "; //$NON-NLS-1$
236 characters= sentinel.toCharArray();
237 int offset= characters.length - 1;
241 for (int index= 0; index < mutators.length; index++) {
243 characters[offset]= mutators[index];
244 neighborhood.add(fHashProvider.getHash(new String(characters)));
250 characters[offset]= characters[offset - 1];
255 characters= word.toCharArray();
257 for (int index= 0; index < word.length(); index++) {
259 mutated= characters[index];
260 for (int mutator= 0; mutator < mutators.length; mutator++) {
262 characters[index]= mutators[mutator];
263 neighborhood.add(fHashProvider.getHash(new String(characters)));
265 characters[index]= mutated;
268 characters= word.toCharArray();
269 final char[] deleted= new char[characters.length - 1];
271 for (int index= 0; index < deleted.length; index++)
272 deleted[index]= characters[index];
274 next= characters[characters.length - 1];
275 offset= deleted.length;
279 neighborhood.add(fHashProvider.getHash(new String(characters)));
284 next= deleted[offset - 1];
286 deleted[offset - 1]= previous;
290 neighborhood.remove(hash);
291 final HashSet matches= getCandidates(word, sentence, neighborhood);
293 if (matches.size() == 0 && candidates.size() == 0)
294 getCandidates(word, sentence, candidates);
296 candidates.addAll(matches);
302 * Returns the URL of the dictionary word list.
304 * @throws MalformedURLException
305 * if the URL could not be retrieved
306 * @return The URL of the dictionary word list
308 protected abstract URL getURL() throws MalformedURLException;
311 * Hashes the word into the dictionary.
314 * The word to hash in the dictionary
316 protected final void hashWord(final String word) {
318 final String hash= fHashProvider.getHash(word);
319 ArrayList bucket= (ArrayList)fHashBuckets.get(hash);
321 if (bucket == null) {
323 bucket= new ArrayList(BUCKET_CAPACITY);
324 fHashBuckets.put(hash, bucket);
331 * @see org.eclipse.jdt.internal.ui.text.spelling.engine.ISpellDictionary#isCorrect(java.lang.String)
333 public boolean isCorrect(final String word) {
340 } catch (MalformedURLException exception) {
344 final ArrayList candidates= getCandidates(fHashProvider.getHash(word));
346 if (candidates.contains(word) || candidates.contains(word.toLowerCase()))
353 * @see org.eclipse.jdt.ui.text.spelling.engine.ISpellDictionary#isLoaded()
355 public final synchronized boolean isLoaded() {
356 return fLoaded || fHashBuckets.size() > 0;
360 * Loads a dictionary word list from disk.
363 * The URL of the word list to load
364 * @return <code>true</code> iff the word list could be loaded, <code>false</code>
367 protected synchronized boolean load(final URL url) {
373 final InputStream stream= url.openStream();
374 if (stream != null) {
378 final BufferedReader reader= new BufferedReader(new InputStreamReader(stream));
379 while ((word= reader.readLine()) != null)
382 return fLoaded= true;
384 } catch (IOException exception) {
392 * Sets the phonetic distance algorithm to use.
395 * The phonetic distance algorithm
397 protected final void setDistanceAlgorithm(final IPhoneticDistanceAlgorithm algorithm) {
398 fDistanceAlgorithm= algorithm;
402 * Sets the phonetic hash provider to use.
405 * The phonetic hash provider
407 protected final void setHashProvider(final IPhoneticHashProvider provider) {
408 fHashProvider= provider;
412 * @see org.eclipse.jdt.ui.text.spelling.engine.ISpellDictionary#unload()
414 public synchronized void unload() {
417 fHashBuckets.clear();
421 * @see org.eclipse.jdt.ui.text.spelling.engine.ISpellDictionary#acceptsWords()
423 public boolean acceptsWords() {
428 * @see org.eclipse.jdt.internal.ui.text.spelling.engine.ISpellDictionary#addWord(java.lang.String)
430 public void addWord(final String word) {